From 1ed9c0071a320dae3f5bf555e4bc7a230b58a71b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 23 Feb 2022 00:06:28 +0100 Subject: [PATCH] Add C versions as fallback --- lapack-netlib/SRC/dbbcsd.c | 1676 +++++++++++++ lapack-netlib/SRC/dbdsdc.c | 969 ++++++++ lapack-netlib/SRC/dbdsqr.c | 1397 +++++++++++ lapack-netlib/SRC/dbdsvdx.c | 1349 ++++++++++ lapack-netlib/SRC/dcombssq.c | 486 ++++ lapack-netlib/SRC/ddisna.c | 651 +++++ lapack-netlib/SRC/dgbbrd.c | 1033 ++++++++ lapack-netlib/SRC/dgbcon.c | 725 ++++++ lapack-netlib/SRC/dgbequ.c | 764 ++++++ lapack-netlib/SRC/dgbequb.c | 783 ++++++ lapack-netlib/SRC/dgbrfs.c | 919 +++++++ lapack-netlib/SRC/dgbrfsx.c | 1181 +++++++++ lapack-netlib/SRC/dgbsv.c | 622 +++++ lapack-netlib/SRC/dgbsvx.c | 1138 +++++++++ lapack-netlib/SRC/dgbsvxx.c | 1249 ++++++++++ lapack-netlib/SRC/dgbtf2.c | 698 ++++++ lapack-netlib/SRC/dgbtrf.c | 1021 ++++++++ lapack-netlib/SRC/dgbtrs.c | 686 ++++++ lapack-netlib/SRC/dgebak.c | 675 +++++ lapack-netlib/SRC/dgebal.c | 840 +++++++ lapack-netlib/SRC/dgebd2.c | 745 ++++++ lapack-netlib/SRC/dgebrd.c | 784 ++++++ lapack-netlib/SRC/dgecon.c | 658 +++++ lapack-netlib/SRC/dgeequ.c | 733 ++++++ lapack-netlib/SRC/dgeequb.c | 753 ++++++ lapack-netlib/SRC/dgees.c | 1002 ++++++++ lapack-netlib/SRC/dgeesx.c | 1124 +++++++++ lapack-netlib/SRC/dgeev.c | 1039 ++++++++ lapack-netlib/SRC/dgeevx.c | 1206 +++++++++ lapack-netlib/SRC/dgehd2.c | 628 +++++ lapack-netlib/SRC/dgehrd.c | 789 ++++++ lapack-netlib/SRC/dgejsv.c | 2682 ++++++++++++++++++++ lapack-netlib/SRC/dgelq.c | 745 ++++++ lapack-netlib/SRC/dgelq2.c | 597 +++++ lapack-netlib/SRC/dgelqf.c | 700 ++++++ lapack-netlib/SRC/dgelqt.c | 621 +++++ lapack-netlib/SRC/dgelqt3.c | 679 ++++++ lapack-netlib/SRC/dgels.c | 956 ++++++++ lapack-netlib/SRC/dgelsd.c | 1153 +++++++++ lapack-netlib/SRC/dgelss.c | 1318 ++++++++++ lapack-netlib/SRC/dgelsy.c | 945 +++++++ lapack-netlib/SRC/dgemlq.c | 684 ++++++ lapack-netlib/SRC/dgemlqt.c | 707 ++++++ lapack-netlib/SRC/dgemqr.c | 685 ++++++ lapack-netlib/SRC/dgemqrt.c | 708 ++++++ lapack-netlib/SRC/dgeql2.c | 591 +++++ lapack-netlib/SRC/dgeqlf.c | 711 ++++++ lapack-netlib/SRC/dgeqp3.c | 795 ++++++ lapack-netlib/SRC/dgeqr.c | 735 ++++++ lapack-netlib/SRC/dgeqr2.c | 602 +++++ lapack-netlib/SRC/dgeqr2p.c | 607 +++++ lapack-netlib/SRC/dgeqrf.c | 702 ++++++ lapack-netlib/SRC/dgeqrfp.c | 705 ++++++ lapack-netlib/SRC/dgeqrt.c | 631 +++++ lapack-netlib/SRC/dgeqrt2.c | 648 +++++ lapack-netlib/SRC/dgeqrt3.c | 681 ++++++ lapack-netlib/SRC/dgerfs.c | 882 +++++++ lapack-netlib/SRC/dgerfsx.c | 1148 +++++++++ lapack-netlib/SRC/dgerq2.c | 587 +++++ lapack-netlib/SRC/dgerqf.c | 710 ++++++ lapack-netlib/SRC/dgesc2.c | 604 +++++ lapack-netlib/SRC/dgesdd.c | 2167 ++++++++++++++++ lapack-netlib/SRC/dgesv.c | 574 +++++ lapack-netlib/SRC/dgesvd.c | 4475 ++++++++++++++++++++++++++++++++++ lapack-netlib/SRC/dgesvdq.c | 2128 ++++++++++++++++ lapack-netlib/SRC/dgesvdx.c | 1342 ++++++++++ lapack-netlib/SRC/dgesvj.c | 2233 +++++++++++++++++ lapack-netlib/SRC/dgesvx.c | 1068 ++++++++ lapack-netlib/SRC/dgesvxx.c | 1211 +++++++++ lapack-netlib/SRC/dgetc2.c | 646 +++++ lapack-netlib/SRC/dgetf2.c | 620 +++++ lapack-netlib/SRC/dgetrf.c | 645 +++++ lapack-netlib/SRC/dgetrf2.c | 683 ++++++ lapack-netlib/SRC/dgetri.c | 694 ++++++ lapack-netlib/SRC/dgetrs.c | 620 +++++ lapack-netlib/SRC/dgetsls.c | 931 +++++++ 76 files changed, 72879 insertions(+) create mode 100644 lapack-netlib/SRC/dbbcsd.c create mode 100644 lapack-netlib/SRC/dbdsdc.c create mode 100644 lapack-netlib/SRC/dbdsqr.c create mode 100644 lapack-netlib/SRC/dbdsvdx.c create mode 100644 lapack-netlib/SRC/dcombssq.c create mode 100644 lapack-netlib/SRC/ddisna.c create mode 100644 lapack-netlib/SRC/dgbbrd.c create mode 100644 lapack-netlib/SRC/dgbcon.c create mode 100644 lapack-netlib/SRC/dgbequ.c create mode 100644 lapack-netlib/SRC/dgbequb.c create mode 100644 lapack-netlib/SRC/dgbrfs.c create mode 100644 lapack-netlib/SRC/dgbrfsx.c create mode 100644 lapack-netlib/SRC/dgbsv.c create mode 100644 lapack-netlib/SRC/dgbsvx.c create mode 100644 lapack-netlib/SRC/dgbsvxx.c create mode 100644 lapack-netlib/SRC/dgbtf2.c create mode 100644 lapack-netlib/SRC/dgbtrf.c create mode 100644 lapack-netlib/SRC/dgbtrs.c create mode 100644 lapack-netlib/SRC/dgebak.c create mode 100644 lapack-netlib/SRC/dgebal.c create mode 100644 lapack-netlib/SRC/dgebd2.c create mode 100644 lapack-netlib/SRC/dgebrd.c create mode 100644 lapack-netlib/SRC/dgecon.c create mode 100644 lapack-netlib/SRC/dgeequ.c create mode 100644 lapack-netlib/SRC/dgeequb.c create mode 100644 lapack-netlib/SRC/dgees.c create mode 100644 lapack-netlib/SRC/dgeesx.c create mode 100644 lapack-netlib/SRC/dgeev.c create mode 100644 lapack-netlib/SRC/dgeevx.c create mode 100644 lapack-netlib/SRC/dgehd2.c create mode 100644 lapack-netlib/SRC/dgehrd.c create mode 100644 lapack-netlib/SRC/dgejsv.c create mode 100644 lapack-netlib/SRC/dgelq.c create mode 100644 lapack-netlib/SRC/dgelq2.c create mode 100644 lapack-netlib/SRC/dgelqf.c create mode 100644 lapack-netlib/SRC/dgelqt.c create mode 100644 lapack-netlib/SRC/dgelqt3.c create mode 100644 lapack-netlib/SRC/dgels.c create mode 100644 lapack-netlib/SRC/dgelsd.c create mode 100644 lapack-netlib/SRC/dgelss.c create mode 100644 lapack-netlib/SRC/dgelsy.c create mode 100644 lapack-netlib/SRC/dgemlq.c create mode 100644 lapack-netlib/SRC/dgemlqt.c create mode 100644 lapack-netlib/SRC/dgemqr.c create mode 100644 lapack-netlib/SRC/dgemqrt.c create mode 100644 lapack-netlib/SRC/dgeql2.c create mode 100644 lapack-netlib/SRC/dgeqlf.c create mode 100644 lapack-netlib/SRC/dgeqp3.c create mode 100644 lapack-netlib/SRC/dgeqr.c create mode 100644 lapack-netlib/SRC/dgeqr2.c create mode 100644 lapack-netlib/SRC/dgeqr2p.c create mode 100644 lapack-netlib/SRC/dgeqrf.c create mode 100644 lapack-netlib/SRC/dgeqrfp.c create mode 100644 lapack-netlib/SRC/dgeqrt.c create mode 100644 lapack-netlib/SRC/dgeqrt2.c create mode 100644 lapack-netlib/SRC/dgeqrt3.c create mode 100644 lapack-netlib/SRC/dgerfs.c create mode 100644 lapack-netlib/SRC/dgerfsx.c create mode 100644 lapack-netlib/SRC/dgerq2.c create mode 100644 lapack-netlib/SRC/dgerqf.c create mode 100644 lapack-netlib/SRC/dgesc2.c create mode 100644 lapack-netlib/SRC/dgesdd.c create mode 100644 lapack-netlib/SRC/dgesv.c create mode 100644 lapack-netlib/SRC/dgesvd.c create mode 100644 lapack-netlib/SRC/dgesvdq.c create mode 100644 lapack-netlib/SRC/dgesvdx.c create mode 100644 lapack-netlib/SRC/dgesvj.c create mode 100644 lapack-netlib/SRC/dgesvx.c create mode 100644 lapack-netlib/SRC/dgesvxx.c create mode 100644 lapack-netlib/SRC/dgetc2.c create mode 100644 lapack-netlib/SRC/dgetf2.c create mode 100644 lapack-netlib/SRC/dgetrf.c create mode 100644 lapack-netlib/SRC/dgetrf2.c create mode 100644 lapack-netlib/SRC/dgetri.c create mode 100644 lapack-netlib/SRC/dgetrs.c create mode 100644 lapack-netlib/SRC/dgetsls.c diff --git a/lapack-netlib/SRC/dbbcsd.c b/lapack-netlib/SRC/dbbcsd.c new file mode 100644 index 000000000..7524cf43e --- /dev/null +++ b/lapack-netlib/SRC/dbbcsd.c @@ -0,0 +1,1676 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DBBCSD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DBBCSD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, */ +/* THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T, */ +/* V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, */ +/* B22D, B22E, WORK, LWORK, INFO ) */ + +/* CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS */ +/* INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q */ +/* DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ), */ +/* $ B21D( * ), B21E( * ), B22D( * ), B22E( * ), */ +/* $ PHI( * ), THETA( * ), WORK( * ) */ +/* DOUBLE PRECISION U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), */ +/* $ V2T( LDV2T, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DBBCSD computes the CS decomposition of an orthogonal matrix in */ +/* > bidiagonal-block form, */ +/* > */ +/* > */ +/* > [ B11 | B12 0 0 ] */ +/* > [ 0 | 0 -I 0 ] */ +/* > X = [----------------] */ +/* > [ B21 | B22 0 0 ] */ +/* > [ 0 | 0 0 I ] */ +/* > */ +/* > [ C | -S 0 0 ] */ +/* > [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T */ +/* > = [---------] [---------------] [---------] . */ +/* > [ | U2 ] [ S | C 0 0 ] [ | V2 ] */ +/* > [ 0 | 0 0 I ] */ +/* > */ +/* > X is M-by-M, its top-left block is P-by-Q, and Q must be no larger */ +/* > than P, M-P, or M-Q. (If Q is not the smallest index, then X must be */ +/* > transposed and/or permuted. This can be done in constant time using */ +/* > the TRANS and SIGNS options. See DORCSD for details.) */ +/* > */ +/* > The bidiagonal matrices B11, B12, B21, and B22 are represented */ +/* > implicitly by angles THETA(1:Q) and PHI(1:Q-1). */ +/* > */ +/* > The orthogonal matrices U1, U2, V1T, and V2T are input/output. */ +/* > The input matrices are pre- or post-multiplied by the appropriate */ +/* > singular vector matrices. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU1 */ +/* > \verbatim */ +/* > JOBU1 is CHARACTER */ +/* > = 'Y': U1 is updated; */ +/* > otherwise: U1 is not updated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBU2 */ +/* > \verbatim */ +/* > JOBU2 is CHARACTER */ +/* > = 'Y': U2 is updated; */ +/* > otherwise: U2 is not updated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV1T */ +/* > \verbatim */ +/* > JOBV1T is CHARACTER */ +/* > = 'Y': V1T is updated; */ +/* > otherwise: V1T is not updated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV2T */ +/* > \verbatim */ +/* > JOBV2T is CHARACTER */ +/* > = 'Y': V2T is updated; */ +/* > otherwise: V2T is not updated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER */ +/* > = 'T': X, U1, U2, V1T, and V2T are stored in row-major */ +/* > order; */ +/* > otherwise: X, U1, U2, V1T, and V2T are stored in column- */ +/* > major order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows and columns in X, the orthogonal matrix in */ +/* > bidiagonal-block form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows in the top-left block of X. 0 <= P <= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Q */ +/* > \verbatim */ +/* > Q is INTEGER */ +/* > The number of columns in the top-left block of X. */ +/* > 0 <= Q <= MIN(P,M-P,M-Q). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] THETA */ +/* > \verbatim */ +/* > THETA is DOUBLE PRECISION array, dimension (Q) */ +/* > On entry, the angles THETA(1),...,THETA(Q) that, along with */ +/* > PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block */ +/* > form. On exit, the angles whose cosines and sines define the */ +/* > diagonal blocks in the CS decomposition. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PHI */ +/* > \verbatim */ +/* > PHI is DOUBLE PRECISION array, dimension (Q-1) */ +/* > The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),..., */ +/* > THETA(Q), define the matrix in bidiagonal-block form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] U1 */ +/* > \verbatim */ +/* > U1 is DOUBLE PRECISION array, dimension (LDU1,P) */ +/* > On entry, a P-by-P matrix. On exit, U1 is postmultiplied */ +/* > by the left singular vector matrix common to [ B11 ; 0 ] and */ +/* > [ B12 0 0 ; 0 -I 0 0 ]. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU1 */ +/* > \verbatim */ +/* > LDU1 is INTEGER */ +/* > The leading dimension of the array U1, LDU1 >= MAX(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] U2 */ +/* > \verbatim */ +/* > U2 is DOUBLE PRECISION array, dimension (LDU2,M-P) */ +/* > On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is */ +/* > postmultiplied by the left singular vector matrix common to */ +/* > [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ]. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU2 */ +/* > \verbatim */ +/* > LDU2 is INTEGER */ +/* > The leading dimension of the array U2, LDU2 >= MAX(1,M-P). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V1T */ +/* > \verbatim */ +/* > V1T is DOUBLE PRECISION array, dimension (LDV1T,Q) */ +/* > On entry, a Q-by-Q matrix. On exit, V1T is premultiplied */ +/* > by the transpose of the right singular vector */ +/* > matrix common to [ B11 ; 0 ] and [ B21 ; 0 ]. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV1T */ +/* > \verbatim */ +/* > LDV1T is INTEGER */ +/* > The leading dimension of the array V1T, LDV1T >= MAX(1,Q). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V2T */ +/* > \verbatim */ +/* > V2T is DOUBLE PRECISION array, dimension (LDV2T,M-Q) */ +/* > On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is */ +/* > premultiplied by the transpose of the right */ +/* > singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and */ +/* > [ B22 0 0 ; 0 0 I ]. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV2T */ +/* > \verbatim */ +/* > LDV2T is INTEGER */ +/* > The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] B11D */ +/* > \verbatim */ +/* > B11D is DOUBLE PRECISION array, dimension (Q) */ +/* > When DBBCSD converges, B11D contains the cosines of THETA(1), */ +/* > ..., THETA(Q). If DBBCSD fails to converge, then B11D */ +/* > contains the diagonal of the partially reduced top-left */ +/* > block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] B11E */ +/* > \verbatim */ +/* > B11E is DOUBLE PRECISION array, dimension (Q-1) */ +/* > When DBBCSD converges, B11E contains zeros. If DBBCSD fails */ +/* > to converge, then B11E contains the superdiagonal of the */ +/* > partially reduced top-left block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] B12D */ +/* > \verbatim */ +/* > B12D is DOUBLE PRECISION array, dimension (Q) */ +/* > When DBBCSD converges, B12D contains the negative sines of */ +/* > THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then */ +/* > B12D contains the diagonal of the partially reduced top-right */ +/* > block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] B12E */ +/* > \verbatim */ +/* > B12E is DOUBLE PRECISION array, dimension (Q-1) */ +/* > When DBBCSD converges, B12E contains zeros. If DBBCSD fails */ +/* > to converge, then B12E contains the subdiagonal of the */ +/* > partially reduced top-right block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] B21D */ +/* > \verbatim */ +/* > B21D is DOUBLE PRECISION array, dimension (Q) */ +/* > When DBBCSD converges, B21D contains the negative sines of */ +/* > THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then */ +/* > B21D contains the diagonal of the partially reduced bottom-left */ +/* > block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] B21E */ +/* > \verbatim */ +/* > B21E is DOUBLE PRECISION array, dimension (Q-1) */ +/* > When DBBCSD converges, B21E contains zeros. If DBBCSD fails */ +/* > to converge, then B21E contains the subdiagonal of the */ +/* > partially reduced bottom-left block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] B22D */ +/* > \verbatim */ +/* > B22D is DOUBLE PRECISION array, dimension (Q) */ +/* > When DBBCSD converges, B22D contains the negative sines of */ +/* > THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then */ +/* > B22D contains the diagonal of the partially reduced bottom-right */ +/* > block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] B22E */ +/* > \verbatim */ +/* > B22E is DOUBLE PRECISION array, dimension (Q-1) */ +/* > When DBBCSD converges, B22E contains zeros. If DBBCSD fails */ +/* > to converge, then B22E contains the subdiagonal of the */ +/* > partially reduced bottom-right block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= MAX(1,8*Q). */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal size of the WORK array, */ +/* > returns this value as the first entry of the work array, and */ +/* > no error message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if DBBCSD did not converge, INFO specifies the number */ +/* > of nonzero entries in PHI, and B11D, B11E, etc., */ +/* > contain the partially reduced matrix. */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > TOLMUL DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8))) */ +/* > TOLMUL controls the convergence criterion of the QR loop. */ +/* > Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they */ +/* > are within TOLMUL*EPS of either bound. */ +/* > \endverbatim */ + +/* > \par References: */ +/* ================ */ +/* > */ +/* > [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. */ +/* > Algorithms, 50(1):33-65, 2009. */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dbbcsd_(char *jobu1, char *jobu2, char *jobv1t, char * + jobv2t, char *trans, integer *m, integer *p, integer *q, doublereal * + theta, doublereal *phi, doublereal *u1, integer *ldu1, doublereal *u2, + integer *ldu2, doublereal *v1t, integer *ldv1t, doublereal *v2t, + integer *ldv2t, doublereal *b11d, doublereal *b11e, doublereal *b12d, + doublereal *b12e, doublereal *b21d, doublereal *b21e, doublereal * + b22d, doublereal *b22e, doublereal *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + integer u1_dim1, u1_offset, u2_dim1, u2_offset, v1t_dim1, v1t_offset, + v2t_dim1, v2t_offset, i__1, i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + integer imin, mini, imax, iter; + doublereal unfl, temp; + logical colmajor; + doublereal thetamin, thetamax; + logical restart11, restart12, restart21, restart22; + extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + integer lworkmin, iu1cs, iu2cs, iu1sn, iu2sn, lworkopt, i__, j; + doublereal r__; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer * + , doublereal *, integer *); + integer maxit; + doublereal dummy, x1, x2, y1, y2; + integer iv1tcs, iv2tcs; + logical wantu1, wantu2; + integer iv1tsn, iv2tsn; + extern doublereal dlamch_(char *); + doublereal mu, nu, sigma11, sigma21; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal thresh, tolmul; + extern /* Subroutine */ int mecago_(); + logical lquery; + doublereal b11bulge; + logical wantv1t, wantv2t; + doublereal b12bulge, b21bulge, b22bulge, eps, tol; + extern /* Subroutine */ int dlartgp_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), dlartgs_(doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* =================================================================== */ + + + +/* Test input arguments */ + + /* Parameter adjustments */ + --theta; + --phi; + u1_dim1 = *ldu1; + u1_offset = 1 + u1_dim1 * 1; + u1 -= u1_offset; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1 * 1; + u2 -= u2_offset; + v1t_dim1 = *ldv1t; + v1t_offset = 1 + v1t_dim1 * 1; + v1t -= v1t_offset; + v2t_dim1 = *ldv2t; + v2t_offset = 1 + v2t_dim1 * 1; + v2t -= v2t_offset; + --b11d; + --b11e; + --b12d; + --b12e; + --b21d; + --b21e; + --b22d; + --b22e; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantu1 = lsame_(jobu1, "Y"); + wantu2 = lsame_(jobu2, "Y"); + wantv1t = lsame_(jobv1t, "Y"); + wantv2t = lsame_(jobv2t, "Y"); + colmajor = ! lsame_(trans, "T"); + + if (*m < 0) { + *info = -6; + } else if (*p < 0 || *p > *m) { + *info = -7; + } else if (*q < 0 || *q > *m) { + *info = -8; + } else if (*q > *p || *q > *m - *p || *q > *m - *q) { + *info = -8; + } else if (wantu1 && *ldu1 < *p) { + *info = -12; + } else if (wantu2 && *ldu2 < *m - *p) { + *info = -14; + } else if (wantv1t && *ldv1t < *q) { + *info = -16; + } else if (wantv2t && *ldv2t < *m - *q) { + *info = -18; + } + +/* Quick return if Q = 0 */ + + if (*info == 0 && *q == 0) { + lworkmin = 1; + work[1] = (doublereal) lworkmin; + return 0; + } + +/* Compute workspace */ + + if (*info == 0) { + iu1cs = 1; + iu1sn = iu1cs + *q; + iu2cs = iu1sn + *q; + iu2sn = iu2cs + *q; + iv1tcs = iu2sn + *q; + iv1tsn = iv1tcs + *q; + iv2tcs = iv1tsn + *q; + iv2tsn = iv2tcs + *q; + lworkopt = iv2tsn + *q - 1; + lworkmin = lworkopt; + work[1] = (doublereal) lworkopt; + if (*lwork < lworkmin && ! lquery) { + *info = -28; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DBBCSD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("Epsilon"); + unfl = dlamch_("Safe minimum"); +/* Computing MAX */ +/* Computing MIN */ + d__3 = 100., d__4 = pow_dd(&eps, &c_b10); + d__1 = 10., d__2 = f2cmin(d__3,d__4); + tolmul = f2cmax(d__1,d__2); + tol = tolmul * eps; +/* Computing MAX */ + d__1 = tol, d__2 = *q * 6 * *q * unfl; + thresh = f2cmax(d__1,d__2); + +/* Test for negligible sines or cosines */ + + i__1 = *q; + for (i__ = 1; i__ <= i__1; ++i__) { + if (theta[i__] < thresh) { + theta[i__] = 0.; + } else if (theta[i__] > 1.57079632679489662 - thresh) { + theta[i__] = 1.57079632679489662; + } + } + i__1 = *q - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (phi[i__] < thresh) { + phi[i__] = 0.; + } else if (phi[i__] > 1.57079632679489662 - thresh) { + phi[i__] = 1.57079632679489662; + } + } + +/* Initial deflation */ + + imax = *q; + while(imax > 1) { + if (phi[imax - 1] != 0.) { + myexit_(); + } + --imax; + } + imin = imax - 1; + if (imin > 1) { + while(phi[imin - 1] != 0.) { + --imin; + if (imin <= 1) { + myexit_(); + } + } + } + +/* Initialize iteration counter */ + + maxit = *q * 6 * *q; + iter = 0; + +/* Begin main iteration loop */ + + while(imax > 1) { + +/* Compute the matrix entries */ + + b11d[imin] = cos(theta[imin]); + b21d[imin] = -sin(theta[imin]); + i__1 = imax - 1; + for (i__ = imin; i__ <= i__1; ++i__) { + b11e[i__] = -sin(theta[i__]) * sin(phi[i__]); + b11d[i__ + 1] = cos(theta[i__ + 1]) * cos(phi[i__]); + b12d[i__] = sin(theta[i__]) * cos(phi[i__]); + b12e[i__] = cos(theta[i__ + 1]) * sin(phi[i__]); + b21e[i__] = -cos(theta[i__]) * sin(phi[i__]); + b21d[i__ + 1] = -sin(theta[i__ + 1]) * cos(phi[i__]); + b22d[i__] = cos(theta[i__]) * cos(phi[i__]); + b22e[i__] = -sin(theta[i__ + 1]) * sin(phi[i__]); + } + b12d[imax] = sin(theta[imax]); + b22d[imax] = cos(theta[imax]); + +/* Abort if not converging; otherwise, increment ITER */ + + if (iter > maxit) { + *info = 0; + i__1 = *q; + for (i__ = 1; i__ <= i__1; ++i__) { + if (phi[i__] != 0.) { + ++(*info); + } + } + return 0; + } + + iter = iter + imax - imin; + +/* Compute shifts */ + + thetamax = theta[imin]; + thetamin = theta[imin]; + i__1 = imax; + for (i__ = imin + 1; i__ <= i__1; ++i__) { + if (theta[i__] > thetamax) { + thetamax = theta[i__]; + } + if (theta[i__] < thetamin) { + thetamin = theta[i__]; + } + } + + if (thetamax > 1.57079632679489662 - thresh) { + +/* Zero on diagonals of B11 and B22; induce deflation with a */ +/* zero shift */ + + mu = 0.; + nu = 1.; + + } else if (thetamin < thresh) { + +/* Zero on diagonals of B12 and B22; induce deflation with a */ +/* zero shift */ + + mu = 1.; + nu = 0.; + + } else { + +/* Compute shifts for B11 and B21 and use the lesser */ + + dlas2_(&b11d[imax - 1], &b11e[imax - 1], &b11d[imax], &sigma11, & + dummy); + dlas2_(&b21d[imax - 1], &b21e[imax - 1], &b21d[imax], &sigma21, & + dummy); + + if (sigma11 <= sigma21) { + mu = sigma11; +/* Computing 2nd power */ + d__1 = mu; + nu = sqrt(1. - d__1 * d__1); + if (mu < thresh) { + mu = 0.; + nu = 1.; + } + } else { + nu = sigma21; +/* Computing 2nd power */ + d__1 = nu; + mu = sqrt(1.f - d__1 * d__1); + if (nu < thresh) { + mu = 1.; + nu = 0.; + } + } + } + +/* Rotate to produce bulges in B11 and B21 */ + + if (mu <= nu) { + dlartgs_(&b11d[imin], &b11e[imin], &mu, &work[iv1tcs + imin - 1], + &work[iv1tsn + imin - 1]); + } else { + dlartgs_(&b21d[imin], &b21e[imin], &nu, &work[iv1tcs + imin - 1], + &work[iv1tsn + imin - 1]); + } + + temp = work[iv1tcs + imin - 1] * b11d[imin] + work[iv1tsn + imin - 1] + * b11e[imin]; + b11e[imin] = work[iv1tcs + imin - 1] * b11e[imin] - work[iv1tsn + + imin - 1] * b11d[imin]; + b11d[imin] = temp; + b11bulge = work[iv1tsn + imin - 1] * b11d[imin + 1]; + b11d[imin + 1] = work[iv1tcs + imin - 1] * b11d[imin + 1]; + temp = work[iv1tcs + imin - 1] * b21d[imin] + work[iv1tsn + imin - 1] + * b21e[imin]; + b21e[imin] = work[iv1tcs + imin - 1] * b21e[imin] - work[iv1tsn + + imin - 1] * b21d[imin]; + b21d[imin] = temp; + b21bulge = work[iv1tsn + imin - 1] * b21d[imin + 1]; + b21d[imin + 1] = work[iv1tcs + imin - 1] * b21d[imin + 1]; + +/* Compute THETA(IMIN) */ + +/* Computing 2nd power */ + d__1 = b21d[imin]; +/* Computing 2nd power */ + d__2 = b21bulge; +/* Computing 2nd power */ + d__3 = b11d[imin]; +/* Computing 2nd power */ + d__4 = b11bulge; + theta[imin] = atan2(sqrt(d__1 * d__1 + d__2 * d__2), sqrt(d__3 * d__3 + + d__4 * d__4)); + +/* Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN) */ + +/* Computing 2nd power */ + d__1 = b11d[imin]; +/* Computing 2nd power */ + d__2 = b11bulge; +/* Computing 2nd power */ + d__3 = thresh; + if (d__1 * d__1 + d__2 * d__2 > d__3 * d__3) { + dlartgp_(&b11bulge, &b11d[imin], &work[iu1sn + imin - 1], &work[ + iu1cs + imin - 1], &r__); + } else if (mu <= nu) { + dlartgs_(&b11e[imin], &b11d[imin + 1], &mu, &work[iu1cs + imin - + 1], &work[iu1sn + imin - 1]); + } else { + dlartgs_(&b12d[imin], &b12e[imin], &nu, &work[iu1cs + imin - 1], & + work[iu1sn + imin - 1]); + } +/* Computing 2nd power */ + d__1 = b21d[imin]; +/* Computing 2nd power */ + d__2 = b21bulge; +/* Computing 2nd power */ + d__3 = thresh; + if (d__1 * d__1 + d__2 * d__2 > d__3 * d__3) { + dlartgp_(&b21bulge, &b21d[imin], &work[iu2sn + imin - 1], &work[ + iu2cs + imin - 1], &r__); + } else if (nu < mu) { + dlartgs_(&b21e[imin], &b21d[imin + 1], &nu, &work[iu2cs + imin - + 1], &work[iu2sn + imin - 1]); + } else { + dlartgs_(&b22d[imin], &b22e[imin], &mu, &work[iu2cs + imin - 1], & + work[iu2sn + imin - 1]); + } + work[iu2cs + imin - 1] = -work[iu2cs + imin - 1]; + work[iu2sn + imin - 1] = -work[iu2sn + imin - 1]; + + temp = work[iu1cs + imin - 1] * b11e[imin] + work[iu1sn + imin - 1] * + b11d[imin + 1]; + b11d[imin + 1] = work[iu1cs + imin - 1] * b11d[imin + 1] - work[iu1sn + + imin - 1] * b11e[imin]; + b11e[imin] = temp; + if (imax > imin + 1) { + b11bulge = work[iu1sn + imin - 1] * b11e[imin + 1]; + b11e[imin + 1] = work[iu1cs + imin - 1] * b11e[imin + 1]; + } + temp = work[iu1cs + imin - 1] * b12d[imin] + work[iu1sn + imin - 1] * + b12e[imin]; + b12e[imin] = work[iu1cs + imin - 1] * b12e[imin] - work[iu1sn + imin + - 1] * b12d[imin]; + b12d[imin] = temp; + b12bulge = work[iu1sn + imin - 1] * b12d[imin + 1]; + b12d[imin + 1] = work[iu1cs + imin - 1] * b12d[imin + 1]; + temp = work[iu2cs + imin - 1] * b21e[imin] + work[iu2sn + imin - 1] * + b21d[imin + 1]; + b21d[imin + 1] = work[iu2cs + imin - 1] * b21d[imin + 1] - work[iu2sn + + imin - 1] * b21e[imin]; + b21e[imin] = temp; + if (imax > imin + 1) { + b21bulge = work[iu2sn + imin - 1] * b21e[imin + 1]; + b21e[imin + 1] = work[iu2cs + imin - 1] * b21e[imin + 1]; + } + temp = work[iu2cs + imin - 1] * b22d[imin] + work[iu2sn + imin - 1] * + b22e[imin]; + b22e[imin] = work[iu2cs + imin - 1] * b22e[imin] - work[iu2sn + imin + - 1] * b22d[imin]; + b22d[imin] = temp; + b22bulge = work[iu2sn + imin - 1] * b22d[imin + 1]; + b22d[imin + 1] = work[iu2cs + imin - 1] * b22d[imin + 1]; + +/* Inner loop: chase bulges from B11(IMIN,IMIN+2), */ +/* B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to */ +/* bottom-right */ + + i__1 = imax - 1; + for (i__ = imin + 1; i__ <= i__1; ++i__) { + +/* Compute PHI(I-1) */ + + x1 = sin(theta[i__ - 1]) * b11e[i__ - 1] + cos(theta[i__ - 1]) * + b21e[i__ - 1]; + x2 = sin(theta[i__ - 1]) * b11bulge + cos(theta[i__ - 1]) * + b21bulge; + y1 = sin(theta[i__ - 1]) * b12d[i__ - 1] + cos(theta[i__ - 1]) * + b22d[i__ - 1]; + y2 = sin(theta[i__ - 1]) * b12bulge + cos(theta[i__ - 1]) * + b22bulge; + +/* Computing 2nd power */ + d__1 = x1; +/* Computing 2nd power */ + d__2 = x2; +/* Computing 2nd power */ + d__3 = y1; +/* Computing 2nd power */ + d__4 = y2; + phi[i__ - 1] = atan2(sqrt(d__1 * d__1 + d__2 * d__2), sqrt(d__3 * + d__3 + d__4 * d__4)); + +/* Determine if there are bulges to chase or if a new direct */ +/* summand has been reached */ + +/* Computing 2nd power */ + d__1 = b11e[i__ - 1]; +/* Computing 2nd power */ + d__2 = b11bulge; +/* Computing 2nd power */ + d__3 = thresh; + restart11 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3; +/* Computing 2nd power */ + d__1 = b21e[i__ - 1]; +/* Computing 2nd power */ + d__2 = b21bulge; +/* Computing 2nd power */ + d__3 = thresh; + restart21 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3; +/* Computing 2nd power */ + d__1 = b12d[i__ - 1]; +/* Computing 2nd power */ + d__2 = b12bulge; +/* Computing 2nd power */ + d__3 = thresh; + restart12 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3; +/* Computing 2nd power */ + d__1 = b22d[i__ - 1]; +/* Computing 2nd power */ + d__2 = b22bulge; +/* Computing 2nd power */ + d__3 = thresh; + restart22 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3; + +/* If possible, chase bulges from B11(I-1,I+1), B12(I-1,I), */ +/* B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge- */ +/* chasing by applying the original shift again. */ + + if (! restart11 && ! restart21) { + dlartgp_(&x2, &x1, &work[iv1tsn + i__ - 1], &work[iv1tcs + + i__ - 1], &r__); + } else if (! restart11 && restart21) { + dlartgp_(&b11bulge, &b11e[i__ - 1], &work[iv1tsn + i__ - 1], & + work[iv1tcs + i__ - 1], &r__); + } else if (restart11 && ! restart21) { + dlartgp_(&b21bulge, &b21e[i__ - 1], &work[iv1tsn + i__ - 1], & + work[iv1tcs + i__ - 1], &r__); + } else if (mu <= nu) { + dlartgs_(&b11d[i__], &b11e[i__], &mu, &work[iv1tcs + i__ - 1], + &work[iv1tsn + i__ - 1]); + } else { + dlartgs_(&b21d[i__], &b21e[i__], &nu, &work[iv1tcs + i__ - 1], + &work[iv1tsn + i__ - 1]); + } + work[iv1tcs + i__ - 1] = -work[iv1tcs + i__ - 1]; + work[iv1tsn + i__ - 1] = -work[iv1tsn + i__ - 1]; + if (! restart12 && ! restart22) { + dlartgp_(&y2, &y1, &work[iv2tsn + i__ - 2], &work[iv2tcs + + i__ - 2], &r__); + } else if (! restart12 && restart22) { + dlartgp_(&b12bulge, &b12d[i__ - 1], &work[iv2tsn + i__ - 2], & + work[iv2tcs + i__ - 2], &r__); + } else if (restart12 && ! restart22) { + dlartgp_(&b22bulge, &b22d[i__ - 1], &work[iv2tsn + i__ - 2], & + work[iv2tcs + i__ - 2], &r__); + } else if (nu < mu) { + dlartgs_(&b12e[i__ - 1], &b12d[i__], &nu, &work[iv2tcs + i__ + - 2], &work[iv2tsn + i__ - 2]); + } else { + dlartgs_(&b22e[i__ - 1], &b22d[i__], &mu, &work[iv2tcs + i__ + - 2], &work[iv2tsn + i__ - 2]); + } + + temp = work[iv1tcs + i__ - 1] * b11d[i__] + work[iv1tsn + i__ - 1] + * b11e[i__]; + b11e[i__] = work[iv1tcs + i__ - 1] * b11e[i__] - work[iv1tsn + + i__ - 1] * b11d[i__]; + b11d[i__] = temp; + b11bulge = work[iv1tsn + i__ - 1] * b11d[i__ + 1]; + b11d[i__ + 1] = work[iv1tcs + i__ - 1] * b11d[i__ + 1]; + temp = work[iv1tcs + i__ - 1] * b21d[i__] + work[iv1tsn + i__ - 1] + * b21e[i__]; + b21e[i__] = work[iv1tcs + i__ - 1] * b21e[i__] - work[iv1tsn + + i__ - 1] * b21d[i__]; + b21d[i__] = temp; + b21bulge = work[iv1tsn + i__ - 1] * b21d[i__ + 1]; + b21d[i__ + 1] = work[iv1tcs + i__ - 1] * b21d[i__ + 1]; + temp = work[iv2tcs + i__ - 2] * b12e[i__ - 1] + work[iv2tsn + i__ + - 2] * b12d[i__]; + b12d[i__] = work[iv2tcs + i__ - 2] * b12d[i__] - work[iv2tsn + + i__ - 2] * b12e[i__ - 1]; + b12e[i__ - 1] = temp; + b12bulge = work[iv2tsn + i__ - 2] * b12e[i__]; + b12e[i__] = work[iv2tcs + i__ - 2] * b12e[i__]; + temp = work[iv2tcs + i__ - 2] * b22e[i__ - 1] + work[iv2tsn + i__ + - 2] * b22d[i__]; + b22d[i__] = work[iv2tcs + i__ - 2] * b22d[i__] - work[iv2tsn + + i__ - 2] * b22e[i__ - 1]; + b22e[i__ - 1] = temp; + b22bulge = work[iv2tsn + i__ - 2] * b22e[i__]; + b22e[i__] = work[iv2tcs + i__ - 2] * b22e[i__]; + +/* Compute THETA(I) */ + + x1 = cos(phi[i__ - 1]) * b11d[i__] + sin(phi[i__ - 1]) * b12e[i__ + - 1]; + x2 = cos(phi[i__ - 1]) * b11bulge + sin(phi[i__ - 1]) * b12bulge; + y1 = cos(phi[i__ - 1]) * b21d[i__] + sin(phi[i__ - 1]) * b22e[i__ + - 1]; + y2 = cos(phi[i__ - 1]) * b21bulge + sin(phi[i__ - 1]) * b22bulge; + +/* Computing 2nd power */ + d__1 = y1; +/* Computing 2nd power */ + d__2 = y2; +/* Computing 2nd power */ + d__3 = x1; +/* Computing 2nd power */ + d__4 = x2; + theta[i__] = atan2(sqrt(d__1 * d__1 + d__2 * d__2), sqrt(d__3 * + d__3 + d__4 * d__4)); + +/* Determine if there are bulges to chase or if a new direct */ +/* summand has been reached */ + +/* Computing 2nd power */ + d__1 = b11d[i__]; +/* Computing 2nd power */ + d__2 = b11bulge; +/* Computing 2nd power */ + d__3 = thresh; + restart11 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3; +/* Computing 2nd power */ + d__1 = b12e[i__ - 1]; +/* Computing 2nd power */ + d__2 = b12bulge; +/* Computing 2nd power */ + d__3 = thresh; + restart12 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3; +/* Computing 2nd power */ + d__1 = b21d[i__]; +/* Computing 2nd power */ + d__2 = b21bulge; +/* Computing 2nd power */ + d__3 = thresh; + restart21 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3; +/* Computing 2nd power */ + d__1 = b22e[i__ - 1]; +/* Computing 2nd power */ + d__2 = b22bulge; +/* Computing 2nd power */ + d__3 = thresh; + restart22 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3; + +/* If possible, chase bulges from B11(I+1,I), B12(I+1,I-1), */ +/* B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge- */ +/* chasing by applying the original shift again. */ + + if (! restart11 && ! restart12) { + dlartgp_(&x2, &x1, &work[iu1sn + i__ - 1], &work[iu1cs + i__ + - 1], &r__); + } else if (! restart11 && restart12) { + dlartgp_(&b11bulge, &b11d[i__], &work[iu1sn + i__ - 1], &work[ + iu1cs + i__ - 1], &r__); + } else if (restart11 && ! restart12) { + dlartgp_(&b12bulge, &b12e[i__ - 1], &work[iu1sn + i__ - 1], & + work[iu1cs + i__ - 1], &r__); + } else if (mu <= nu) { + dlartgs_(&b11e[i__], &b11d[i__ + 1], &mu, &work[iu1cs + i__ - + 1], &work[iu1sn + i__ - 1]); + } else { + dlartgs_(&b12d[i__], &b12e[i__], &nu, &work[iu1cs + i__ - 1], + &work[iu1sn + i__ - 1]); + } + if (! restart21 && ! restart22) { + dlartgp_(&y2, &y1, &work[iu2sn + i__ - 1], &work[iu2cs + i__ + - 1], &r__); + } else if (! restart21 && restart22) { + dlartgp_(&b21bulge, &b21d[i__], &work[iu2sn + i__ - 1], &work[ + iu2cs + i__ - 1], &r__); + } else if (restart21 && ! restart22) { + dlartgp_(&b22bulge, &b22e[i__ - 1], &work[iu2sn + i__ - 1], & + work[iu2cs + i__ - 1], &r__); + } else if (nu < mu) { + dlartgs_(&b21e[i__], &b21e[i__ + 1], &nu, &work[iu2cs + i__ - + 1], &work[iu2sn + i__ - 1]); + } else { + dlartgs_(&b22d[i__], &b22e[i__], &mu, &work[iu2cs + i__ - 1], + &work[iu2sn + i__ - 1]); + } + work[iu2cs + i__ - 1] = -work[iu2cs + i__ - 1]; + work[iu2sn + i__ - 1] = -work[iu2sn + i__ - 1]; + + temp = work[iu1cs + i__ - 1] * b11e[i__] + work[iu1sn + i__ - 1] * + b11d[i__ + 1]; + b11d[i__ + 1] = work[iu1cs + i__ - 1] * b11d[i__ + 1] - work[ + iu1sn + i__ - 1] * b11e[i__]; + b11e[i__] = temp; + if (i__ < imax - 1) { + b11bulge = work[iu1sn + i__ - 1] * b11e[i__ + 1]; + b11e[i__ + 1] = work[iu1cs + i__ - 1] * b11e[i__ + 1]; + } + temp = work[iu2cs + i__ - 1] * b21e[i__] + work[iu2sn + i__ - 1] * + b21d[i__ + 1]; + b21d[i__ + 1] = work[iu2cs + i__ - 1] * b21d[i__ + 1] - work[ + iu2sn + i__ - 1] * b21e[i__]; + b21e[i__] = temp; + if (i__ < imax - 1) { + b21bulge = work[iu2sn + i__ - 1] * b21e[i__ + 1]; + b21e[i__ + 1] = work[iu2cs + i__ - 1] * b21e[i__ + 1]; + } + temp = work[iu1cs + i__ - 1] * b12d[i__] + work[iu1sn + i__ - 1] * + b12e[i__]; + b12e[i__] = work[iu1cs + i__ - 1] * b12e[i__] - work[iu1sn + i__ + - 1] * b12d[i__]; + b12d[i__] = temp; + b12bulge = work[iu1sn + i__ - 1] * b12d[i__ + 1]; + b12d[i__ + 1] = work[iu1cs + i__ - 1] * b12d[i__ + 1]; + temp = work[iu2cs + i__ - 1] * b22d[i__] + work[iu2sn + i__ - 1] * + b22e[i__]; + b22e[i__] = work[iu2cs + i__ - 1] * b22e[i__] - work[iu2sn + i__ + - 1] * b22d[i__]; + b22d[i__] = temp; + b22bulge = work[iu2sn + i__ - 1] * b22d[i__ + 1]; + b22d[i__ + 1] = work[iu2cs + i__ - 1] * b22d[i__ + 1]; + + } + +/* Compute PHI(IMAX-1) */ + + x1 = sin(theta[imax - 1]) * b11e[imax - 1] + cos(theta[imax - 1]) * + b21e[imax - 1]; + y1 = sin(theta[imax - 1]) * b12d[imax - 1] + cos(theta[imax - 1]) * + b22d[imax - 1]; + y2 = sin(theta[imax - 1]) * b12bulge + cos(theta[imax - 1]) * + b22bulge; + +/* Computing 2nd power */ + d__1 = y1; +/* Computing 2nd power */ + d__2 = y2; + phi[imax - 1] = atan2((abs(x1)), sqrt(d__1 * d__1 + d__2 * d__2)); + +/* Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX) */ + +/* Computing 2nd power */ + d__1 = b12d[imax - 1]; +/* Computing 2nd power */ + d__2 = b12bulge; +/* Computing 2nd power */ + d__3 = thresh; + restart12 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3; +/* Computing 2nd power */ + d__1 = b22d[imax - 1]; +/* Computing 2nd power */ + d__2 = b22bulge; +/* Computing 2nd power */ + d__3 = thresh; + restart22 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3; + + if (! restart12 && ! restart22) { + dlartgp_(&y2, &y1, &work[iv2tsn + imax - 2], &work[iv2tcs + imax + - 2], &r__); + } else if (! restart12 && restart22) { + dlartgp_(&b12bulge, &b12d[imax - 1], &work[iv2tsn + imax - 2], & + work[iv2tcs + imax - 2], &r__); + } else if (restart12 && ! restart22) { + dlartgp_(&b22bulge, &b22d[imax - 1], &work[iv2tsn + imax - 2], & + work[iv2tcs + imax - 2], &r__); + } else if (nu < mu) { + dlartgs_(&b12e[imax - 1], &b12d[imax], &nu, &work[iv2tcs + imax - + 2], &work[iv2tsn + imax - 2]); + } else { + dlartgs_(&b22e[imax - 1], &b22d[imax], &mu, &work[iv2tcs + imax - + 2], &work[iv2tsn + imax - 2]); + } + + temp = work[iv2tcs + imax - 2] * b12e[imax - 1] + work[iv2tsn + imax + - 2] * b12d[imax]; + b12d[imax] = work[iv2tcs + imax - 2] * b12d[imax] - work[iv2tsn + + imax - 2] * b12e[imax - 1]; + b12e[imax - 1] = temp; + temp = work[iv2tcs + imax - 2] * b22e[imax - 1] + work[iv2tsn + imax + - 2] * b22d[imax]; + b22d[imax] = work[iv2tcs + imax - 2] * b22d[imax] - work[iv2tsn + + imax - 2] * b22e[imax - 1]; + b22e[imax - 1] = temp; + +/* Update singular vectors */ + + if (wantu1) { + if (colmajor) { + i__1 = imax - imin + 1; + dlasr_("R", "V", "F", p, &i__1, &work[iu1cs + imin - 1], & + work[iu1sn + imin - 1], &u1[imin * u1_dim1 + 1], ldu1); + } else { + i__1 = imax - imin + 1; + dlasr_("L", "V", "F", &i__1, p, &work[iu1cs + imin - 1], & + work[iu1sn + imin - 1], &u1[imin + u1_dim1], ldu1); + } + } + if (wantu2) { + if (colmajor) { + i__1 = *m - *p; + i__2 = imax - imin + 1; + dlasr_("R", "V", "F", &i__1, &i__2, &work[iu2cs + imin - 1], & + work[iu2sn + imin - 1], &u2[imin * u2_dim1 + 1], ldu2); + } else { + i__1 = imax - imin + 1; + i__2 = *m - *p; + dlasr_("L", "V", "F", &i__1, &i__2, &work[iu2cs + imin - 1], & + work[iu2sn + imin - 1], &u2[imin + u2_dim1], ldu2); + } + } + if (wantv1t) { + if (colmajor) { + i__1 = imax - imin + 1; + dlasr_("L", "V", "F", &i__1, q, &work[iv1tcs + imin - 1], & + work[iv1tsn + imin - 1], &v1t[imin + v1t_dim1], ldv1t); + } else { + i__1 = imax - imin + 1; + dlasr_("R", "V", "F", q, &i__1, &work[iv1tcs + imin - 1], & + work[iv1tsn + imin - 1], &v1t[imin * v1t_dim1 + 1], + ldv1t); + } + } + if (wantv2t) { + if (colmajor) { + i__1 = imax - imin + 1; + i__2 = *m - *q; + dlasr_("L", "V", "F", &i__1, &i__2, &work[iv2tcs + imin - 1], + &work[iv2tsn + imin - 1], &v2t[imin + v2t_dim1], + ldv2t); + } else { + i__1 = *m - *q; + i__2 = imax - imin + 1; + dlasr_("R", "V", "F", &i__1, &i__2, &work[iv2tcs + imin - 1], + &work[iv2tsn + imin - 1], &v2t[imin * v2t_dim1 + 1], + ldv2t); + } + } + +/* Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX) */ + + if (b11e[imax - 1] + b21e[imax - 1] > 0.) { + b11d[imax] = -b11d[imax]; + b21d[imax] = -b21d[imax]; + if (wantv1t) { + if (colmajor) { + dscal_(q, &c_b35, &v1t[imax + v1t_dim1], ldv1t); + } else { + dscal_(q, &c_b35, &v1t[imax * v1t_dim1 + 1], &c__1); + } + } + } + +/* Compute THETA(IMAX) */ + + x1 = cos(phi[imax - 1]) * b11d[imax] + sin(phi[imax - 1]) * b12e[imax + - 1]; + y1 = cos(phi[imax - 1]) * b21d[imax] + sin(phi[imax - 1]) * b22e[imax + - 1]; + + theta[imax] = atan2((abs(y1)), (abs(x1))); + +/* Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX), */ +/* and B22(IMAX,IMAX-1) */ + + if (b11d[imax] + b12e[imax - 1] < 0.) { + b12d[imax] = -b12d[imax]; + if (wantu1) { + if (colmajor) { + dscal_(p, &c_b35, &u1[imax * u1_dim1 + 1], &c__1); + } else { + dscal_(p, &c_b35, &u1[imax + u1_dim1], ldu1); + } + } + } + if (b21d[imax] + b22e[imax - 1] > 0.) { + b22d[imax] = -b22d[imax]; + if (wantu2) { + if (colmajor) { + i__1 = *m - *p; + dscal_(&i__1, &c_b35, &u2[imax * u2_dim1 + 1], &c__1); + } else { + i__1 = *m - *p; + dscal_(&i__1, &c_b35, &u2[imax + u2_dim1], ldu2); + } + } + } + +/* Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX) */ + + if (b12d[imax] + b22d[imax] < 0.) { + if (wantv2t) { + if (colmajor) { + i__1 = *m - *q; + dscal_(&i__1, &c_b35, &v2t[imax + v2t_dim1], ldv2t); + } else { + i__1 = *m - *q; + dscal_(&i__1, &c_b35, &v2t[imax * v2t_dim1 + 1], &c__1); + } + } + } + +/* Test for negligible sines or cosines */ + + i__1 = imax; + for (i__ = imin; i__ <= i__1; ++i__) { + if (theta[i__] < thresh) { + theta[i__] = 0.; + } else if (theta[i__] > 1.57079632679489662 - thresh) { + theta[i__] = 1.57079632679489662; + } + } + i__1 = imax - 1; + for (i__ = imin; i__ <= i__1; ++i__) { + if (phi[i__] < thresh) { + phi[i__] = 0.; + } else if (phi[i__] > 1.57079632679489662 - thresh) { + phi[i__] = 1.57079632679489662; + } + } + +/* Deflate */ + + if (imax > 1) { + while(phi[imax - 1] == 0.) { + --imax; + if (imax <= 1) { + myexit_(); + } + } + } + if (imin > imax - 1) { + imin = imax - 1; + } + if (imin > 1) { + while(phi[imin - 1] != 0.) { + --imin; + if (imin <= 1) { + myexit_(); + } + } + } + +/* Repeat main iteration loop */ + + } + +/* Postprocessing: order THETA from least to greatest */ + + i__1 = *q; + for (i__ = 1; i__ <= i__1; ++i__) { + + mini = i__; + thetamin = theta[i__]; + i__2 = *q; + for (j = i__ + 1; j <= i__2; ++j) { + if (theta[j] < thetamin) { + mini = j; + thetamin = theta[j]; + } + } + + if (mini != i__) { + theta[mini] = theta[i__]; + theta[i__] = thetamin; + if (colmajor) { + if (wantu1) { + dswap_(p, &u1[i__ * u1_dim1 + 1], &c__1, &u1[mini * + u1_dim1 + 1], &c__1); + } + if (wantu2) { + i__2 = *m - *p; + dswap_(&i__2, &u2[i__ * u2_dim1 + 1], &c__1, &u2[mini * + u2_dim1 + 1], &c__1); + } + if (wantv1t) { + dswap_(q, &v1t[i__ + v1t_dim1], ldv1t, &v1t[mini + + v1t_dim1], ldv1t); + } + if (wantv2t) { + i__2 = *m - *q; + dswap_(&i__2, &v2t[i__ + v2t_dim1], ldv2t, &v2t[mini + + v2t_dim1], ldv2t); + } + } else { + if (wantu1) { + dswap_(p, &u1[i__ + u1_dim1], ldu1, &u1[mini + u1_dim1], + ldu1); + } + if (wantu2) { + i__2 = *m - *p; + dswap_(&i__2, &u2[i__ + u2_dim1], ldu2, &u2[mini + + u2_dim1], ldu2); + } + if (wantv1t) { + dswap_(q, &v1t[i__ * v1t_dim1 + 1], &c__1, &v1t[mini * + v1t_dim1 + 1], &c__1); + } + if (wantv2t) { + i__2 = *m - *q; + dswap_(&i__2, &v2t[i__ * v2t_dim1 + 1], &c__1, &v2t[mini * + v2t_dim1 + 1], &c__1); + } + } + } + + } + + return 0; + +/* End of DBBCSD */ + +} /* dbbcsd_ */ + diff --git a/lapack-netlib/SRC/dbdsdc.c b/lapack-netlib/SRC/dbdsdc.c new file mode 100644 index 000000000..f94b56467 --- /dev/null +++ b/lapack-netlib/SRC/dbdsdc.c @@ -0,0 +1,969 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DBDSDC */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DBDSDC + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, */ +/* WORK, IWORK, INFO ) */ + +/* CHARACTER COMPQ, UPLO */ +/* INTEGER INFO, LDU, LDVT, N */ +/* INTEGER IQ( * ), IWORK( * ) */ +/* DOUBLE PRECISION D( * ), E( * ), Q( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DBDSDC computes the singular value decomposition (SVD) of a real */ +/* > N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, */ +/* > using a divide and conquer method, where S is a diagonal matrix */ +/* > with non-negative diagonal elements (the singular values of B), and */ +/* > U and VT are orthogonal matrices of left and right singular vectors, */ +/* > respectively. DBDSDC can be used to compute all singular values, */ +/* > and optionally, singular vectors or singular vectors in compact form. */ +/* > */ +/* > This code makes very mild assumptions about floating point */ +/* > arithmetic. It will work on machines with a guard digit in */ +/* > add/subtract, or on those binary machines without guard digits */ +/* > which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ +/* > It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. See DLASD3 for details. */ +/* > */ +/* > The code currently calls DLASDQ if singular values only are desired. */ +/* > However, it can be slightly modified to compute singular values */ +/* > using the divide and conquer method. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': B is upper bidiagonal. */ +/* > = 'L': B is lower bidiagonal. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COMPQ */ +/* > \verbatim */ +/* > COMPQ is CHARACTER*1 */ +/* > Specifies whether singular vectors are to be computed */ +/* > as follows: */ +/* > = 'N': Compute singular values only; */ +/* > = 'P': Compute singular values and compute singular */ +/* > vectors in compact form; */ +/* > = 'I': Compute singular values and singular vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the n diagonal elements of the bidiagonal matrix B. */ +/* > On exit, if INFO=0, the singular values of B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > On entry, the elements of E contain the offdiagonal */ +/* > elements of the bidiagonal matrix whose SVD is desired. */ +/* > On exit, E has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension (LDU,N) */ +/* > If COMPQ = 'I', then: */ +/* > On exit, if INFO = 0, U contains the left singular vectors */ +/* > of the bidiagonal matrix. */ +/* > For other values of COMPQ, U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1. */ +/* > If singular vectors are desired, then LDU >= f2cmax( 1, N ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is DOUBLE PRECISION array, dimension (LDVT,N) */ +/* > If COMPQ = 'I', then: */ +/* > On exit, if INFO = 0, VT**T contains the right singular */ +/* > vectors of the bidiagonal matrix. */ +/* > For other values of COMPQ, VT is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1. */ +/* > If singular vectors are desired, then LDVT >= f2cmax( 1, N ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION array, dimension (LDQ) */ +/* > If COMPQ = 'P', then: */ +/* > On exit, if INFO = 0, Q and IQ contain the left */ +/* > and right singular vectors in a compact form, */ +/* > requiring O(N log N) space instead of 2*N**2. */ +/* > In particular, Q contains all the DOUBLE PRECISION data in */ +/* > LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) */ +/* > words of memory, where SMLSIZ is returned by ILAENV and */ +/* > is equal to the maximum size of the subproblems at the */ +/* > bottom of the computation tree (usually about 25). */ +/* > For other values of COMPQ, Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IQ */ +/* > \verbatim */ +/* > IQ is INTEGER array, dimension (LDIQ) */ +/* > If COMPQ = 'P', then: */ +/* > On exit, if INFO = 0, Q and IQ contain the left */ +/* > and right singular vectors in a compact form, */ +/* > requiring O(N log N) space instead of 2*N**2. */ +/* > In particular, IQ contains all INTEGER data in */ +/* > LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) */ +/* > words of memory, where SMLSIZ is returned by ILAENV and */ +/* > is equal to the maximum size of the subproblems at the */ +/* > bottom of the computation tree (usually about 25). */ +/* > For other values of COMPQ, IQ is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > If COMPQ = 'N' then LWORK >= (4 * N). */ +/* > If COMPQ = 'P' then LWORK >= (6 * N). */ +/* > If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (8*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: The algorithm failed to compute a singular value. */ +/* > The update process of divide and conquer failed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal * + d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, + integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer difl, difr, ierr, perm, mlvl, sqre, i__, j, k; + doublereal p, r__; + integer z__; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer * + , doublereal *, integer *), dswap_(integer *, doublereal *, + integer *, doublereal *, integer *); + integer poles, iuplo, nsize, start; + extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *); + integer ic, ii, kk; + doublereal cs; + extern doublereal dlamch_(char *); + extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + integer is, iu; + doublereal sn; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlasdq_(char *, integer *, integer + *, integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlaset_(char *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *), dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + integer icompq; + doublereal orgnrm; + integer givnum, givptr, nm1, qstart, smlsiz, wstart, smlszp; + doublereal eps; + integer ivt; + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ +/* Changed dimension statement in comment describing E from (N) to */ +/* (N-1). Sven, 17 Feb 05. */ +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + --q; + --iq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + iuplo = 0; + if (lsame_(uplo, "U")) { + iuplo = 1; + } + if (lsame_(uplo, "L")) { + iuplo = 2; + } + if (lsame_(compq, "N")) { + icompq = 0; + } else if (lsame_(compq, "P")) { + icompq = 1; + } else if (lsame_(compq, "I")) { + icompq = 2; + } else { + icompq = -1; + } + if (iuplo == 0) { + *info = -1; + } else if (icompq < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { + *info = -7; + } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DBDSDC", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n == 1) { + if (icompq == 1) { + q[1] = d_sign(&c_b15, &d__[1]); + q[smlsiz * *n + 1] = 1.; + } else if (icompq == 2) { + u[u_dim1 + 1] = d_sign(&c_b15, &d__[1]); + vt[vt_dim1 + 1] = 1.; + } + d__[1] = abs(d__[1]); + return 0; + } + nm1 = *n - 1; + +/* If matrix lower bidiagonal, rotate to be upper bidiagonal */ +/* by applying Givens rotations on the left */ + + wstart = 1; + qstart = 3; + if (icompq == 1) { + dcopy_(n, &d__[1], &c__1, &q[1], &c__1); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); + } + if (iuplo == 2) { + qstart = 5; + if (icompq == 2) { + wstart = (*n << 1) - 1; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (icompq == 1) { + q[i__ + (*n << 1)] = cs; + q[i__ + *n * 3] = sn; + } else if (icompq == 2) { + work[i__] = cs; + work[nm1 + i__] = -sn; + } +/* L10: */ + } + } + +/* If ICOMPQ = 0, use DLASDQ to compute the singular values. */ + + if (icompq == 0) { +/* Ignore WSTART, instead using WORK( 1 ), since the two vectors */ +/* for CS and -SN above are added only if ICOMPQ == 2, */ +/* and adding them exceeds documented WORK size of 4*n. */ + dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + 1], info); + goto L40; + } + +/* If N is smaller than the minimum divide size SMLSIZ, then solve */ +/* the problem with another solver. */ + + if (*n <= smlsiz) { + if (icompq == 2) { + dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] + , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + } else if (icompq == 1) { + iu = 1; + ivt = iu + *n; + dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n); + dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n); + dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( + qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ + iu + (qstart - 1) * *n], n, &work[wstart], info); + } + goto L40; + } + + if (icompq == 2) { + dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + } + +/* Scale. */ + + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + return 0; + } + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, & + ierr); + + eps = dlamch_("Epsilon") * .9; + + mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / + log(2.)) + 1; + smlszp = smlsiz + 1; + + if (icompq == 1) { + iu = 1; + ivt = smlsiz + 1; + difl = ivt + smlszp; + difr = difl + mlvl; + z__ = difr + (mlvl << 1); + ic = z__ + mlvl; + is = ic + 1; + poles = is + 1; + givnum = poles + (mlvl << 1); + + k = 1; + givptr = 2; + perm = 3; + givcol = perm + mlvl; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_sign(&eps, &d__[i__]); + } +/* L20: */ + } + + start = 1; + sqre = 0; + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + +/* Subproblem found. First determine its size and then */ +/* apply divide and conquer on it. */ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - start + 1; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - start + 1; + } else { + +/* A subproblem with E(NM1) small. This implies an */ +/* 1-by-1 subproblem at D(N). Solve this 1-by-1 problem */ +/* first. */ + + nsize = i__ - start + 1; + if (icompq == 2) { + u[*n + *n * u_dim1] = d_sign(&c_b15, &d__[*n]); + vt[*n + *n * vt_dim1] = 1.; + } else if (icompq == 1) { + q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]); + q[*n + (smlsiz + qstart - 1) * *n] = 1.; + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + } + if (icompq == 2) { + dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + + start * u_dim1], ldu, &vt[start + start * vt_dim1], + ldvt, &smlsiz, &iwork[1], &work[wstart], info); + } else { + dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ + start], &q[start + (iu + qstart - 2) * *n], n, &q[ + start + (ivt + qstart - 2) * *n], &iq[start + k * *n], + &q[start + (difl + qstart - 2) * *n], &q[start + ( + difr + qstart - 2) * *n], &q[start + (z__ + qstart - + 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ + start + givptr * *n], &iq[start + givcol * *n], n, & + iq[start + perm * *n], &q[start + (givnum + qstart - + 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ + start + (is + qstart - 2) * *n], &work[wstart], & + iwork[1], info); + } + if (*info != 0) { + return 0; + } + start = i__ + 1; + } +/* L30: */ + } + +/* Unscale */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr); +L40: + +/* Use Selection Sort to minimize swaps of singular vectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + kk = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] > p) { + kk = j; + p = d__[j]; + } +/* L50: */ + } + if (kk != i__) { + d__[kk] = d__[i__]; + d__[i__] = p; + if (icompq == 1) { + iq[i__] = kk; + } else if (icompq == 2) { + dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], & + c__1); + dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); + } + } else if (icompq == 1) { + iq[i__] = i__; + } +/* L60: */ + } + +/* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ + + if (icompq == 1) { + if (iuplo == 1) { + iq[*n] = 1; + } else { + iq[*n] = 0; + } + } + +/* If B is lower bidiagonal, update U by those Givens rotations */ +/* which rotated B to be upper bidiagonal */ + + if (iuplo == 2 && icompq == 2) { + dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); + } + + return 0; + +/* End of DBDSDC */ + +} /* dbdsdc_ */ + diff --git a/lapack-netlib/SRC/dbdsqr.c b/lapack-netlib/SRC/dbdsqr.c new file mode 100644 index 000000000..d8d2f5b3f --- /dev/null +++ b/lapack-netlib/SRC/dbdsqr.c @@ -0,0 +1,1397 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DBDSQR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DBDSQR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, */ +/* LDU, C, LDC, WORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU */ +/* DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DBDSQR computes the singular values and, optionally, the right and/or */ +/* > left singular vectors from the singular value decomposition (SVD) of */ +/* > a real N-by-N (upper or lower) bidiagonal matrix B using the implicit */ +/* > zero-shift QR algorithm. The SVD of B has the form */ +/* > */ +/* > B = Q * S * P**T */ +/* > */ +/* > where S is the diagonal matrix of singular values, Q is an orthogonal */ +/* > matrix of left singular vectors, and P is an orthogonal matrix of */ +/* > right singular vectors. If left singular vectors are requested, this */ +/* > subroutine actually returns U*Q instead of Q, and, if right singular */ +/* > vectors are requested, this subroutine returns P**T*VT instead of */ +/* > P**T, for given real input matrices U and VT. When U and VT are the */ +/* > orthogonal matrices that reduce a general matrix A to bidiagonal */ +/* > form: A = U*B*VT, as computed by DGEBRD, then */ +/* > */ +/* > A = (U*Q) * S * (P**T*VT) */ +/* > */ +/* > is the SVD of A. Optionally, the subroutine may also compute Q**T*C */ +/* > for a given real input matrix C. */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices With */ +/* > Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */ +/* > LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, */ +/* > no. 5, pp. 873-912, Sept 1990) and */ +/* > "Accurate singular values and differential qd algorithms," by */ +/* > B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics */ +/* > Department, University of California at Berkeley, July 1992 */ +/* > for a detailed description of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': B is upper bidiagonal; */ +/* > = 'L': B is lower bidiagonal. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NCVT */ +/* > \verbatim */ +/* > NCVT is INTEGER */ +/* > The number of columns of the matrix VT. NCVT >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRU */ +/* > \verbatim */ +/* > NRU is INTEGER */ +/* > The number of rows of the matrix U. NRU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NCC */ +/* > \verbatim */ +/* > NCC is INTEGER */ +/* > The number of columns of the matrix C. NCC >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the n diagonal elements of the bidiagonal matrix B. */ +/* > On exit, if INFO=0, the singular values of B in decreasing */ +/* > order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > On entry, the N-1 offdiagonal elements of the bidiagonal */ +/* > matrix B. */ +/* > On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E */ +/* > will contain the diagonal and superdiagonal elements of a */ +/* > bidiagonal matrix orthogonally equivalent to the one given */ +/* > as input. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VT */ +/* > \verbatim */ +/* > VT is DOUBLE PRECISION array, dimension (LDVT, NCVT) */ +/* > On entry, an N-by-NCVT matrix VT. */ +/* > On exit, VT is overwritten by P**T * VT. */ +/* > Not referenced if NCVT = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. */ +/* > LDVT >= f2cmax(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension (LDU, N) */ +/* > On entry, an NRU-by-N matrix U. */ +/* > On exit, U is overwritten by U * Q. */ +/* > Not referenced if NRU = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= f2cmax(1,NRU). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC, NCC) */ +/* > On entry, an N-by-NCC matrix C. */ +/* > On exit, C is overwritten by Q**T * C. */ +/* > Not referenced if NCC = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. */ +/* > LDC >= f2cmax(1,N) if NCC > 0; LDC >=1 if NCC = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*(N-1)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0: */ +/* > if NCVT = NRU = NCC = 0, */ +/* > = 1, a split was marked by a positive value in E */ +/* > = 2, current block of Z not diagonalized after 30*N */ +/* > iterations (in inner while loop) */ +/* > = 3, termination criterion of outer while loop not met */ +/* > (program created more than N unreduced blocks) */ +/* > else NCVT = NRU = NCC = 0, */ +/* > the algorithm did not converge; D and E contain the */ +/* > elements of a bidiagonal matrix which is orthogonally */ +/* > similar to the input matrix B; if INFO = i, i */ +/* > elements of E have not converged to zero. */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > TOLMUL DOUBLE PRECISION, default = f2cmax(10,f2cmin(100,EPS**(-1/8))) */ +/* > TOLMUL controls the convergence criterion of the QR loop. */ +/* > If it is positive, TOLMUL*EPS is the desired relative */ +/* > precision in the computed singular values. */ +/* > If it is negative, abs(TOLMUL*EPS*sigma_max) is the */ +/* > desired absolute accuracy in the computed singular */ +/* > values (corresponds to relative accuracy */ +/* > abs(TOLMUL*EPS) in the largest singular value. */ +/* > abs(TOLMUL) should be between 1 and 1/EPS, and preferably */ +/* > between 10 (for fast convergence) and .1/EPS */ +/* > (for there to be some accuracy in the results). */ +/* > Default is to lose at either one eighth or 2 of the */ +/* > available decimal digits in each computed singular value */ +/* > (whichever is smaller). */ +/* > */ +/* > MAXITR INTEGER, default = 6 */ +/* > MAXITR controls the maximum number of passes of the */ +/* > algorithm through its inner loop. The algorithms stops */ +/* > (and so fails to converge) if the number of passes */ +/* > through the inner loop exceeds MAXITR*N**2. */ +/* > */ +/* > \endverbatim */ + +/* > \par Note: */ +/* =========== */ +/* > */ +/* > \verbatim */ +/* > Bug report from Cezary Dendek. */ +/* > On March 23rd 2017, the INTEGER variable MAXIT = MAXITR*N**2 is */ +/* > removed since it can overflow pretty easily (for N larger or equal */ +/* > than 18,919). We instead use MAXITDIVN = MAXITR*N. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, + integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * + ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + doublereal abse; + integer idir; + doublereal abss; + integer oldm; + doublereal cosl; + integer isub, iter; + doublereal unfl, sinl, cosr, smin, smax, sinr; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer iterdivn; + extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + doublereal f, g, h__; + integer i__, j, m; + doublereal r__; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + doublereal oldcs; + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *); + integer oldll; + doublereal shift, sigmn, oldsn; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal sminl, sigmx; + logical lower; + integer maxitdivn; + extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *, + doublereal *, integer *), dlasv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + doublereal cs; + integer ll; + extern doublereal dlamch_(char *); + doublereal sn, mu; + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), xerbla_(char *, + integer *, ftnlen); + doublereal sminoa, thresh; + logical rotate; + integer nm1; + doublereal tolmul; + integer nm12, nm13, lll; + doublereal eps, sll, tol; + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + lower = lsame_(uplo, "L"); + if (! lsame_(uplo, "U") && ! lower) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ncvt < 0) { + *info = -3; + } else if (*nru < 0) { + *info = -4; + } else if (*ncc < 0) { + *info = -5; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < f2cmax(1,*n)) { + *info = -9; + } else if (*ldu < f2cmax(1,*nru)) { + *info = -11; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < f2cmax(1,*n)) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DBDSQR", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (*n == 1) { + goto L160; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + +/* If no singular vectors desired, use qd algorithm */ + + if (! rotate) { + dlasq1_(n, &d__[1], &e[1], &work[1], info); + +/* If INFO equals 2, dqds didn't finish, try to finish */ + + if (*info != 2) { + return 0; + } + *info = 0; + } + + nm1 = *n - 1; + nm12 = nm1 + nm1; + nm13 = nm12 + nm1; + idir = 0; + +/* Get machine constants */ + + eps = dlamch_("Epsilon"); + unfl = dlamch_("Safe minimum"); + +/* If matrix lower bidiagonal, rotate to be upper bidiagonal */ +/* by applying Givens rotations on the left */ + + if (lower) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + work[i__] = cs; + work[nm1 + i__] = sn; +/* L10: */ + } + +/* Update singular vectors if desired */ + + if (*nru > 0) { + dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], + ldu); + } + if (*ncc > 0) { + dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], + ldc); + } + } + +/* Compute singular values to relative accuracy TOL */ +/* (By setting TOL to be negative, algorithm will compute */ +/* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) */ + +/* Computing MAX */ +/* Computing MIN */ + d__3 = 100., d__4 = pow_dd(&eps, &c_b15); + d__1 = 10., d__2 = f2cmin(d__3,d__4); + tolmul = f2cmax(d__1,d__2); + tol = tolmul * eps; + +/* Compute approximate maximum, minimum singular values */ + + smax = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1)); + smax = f2cmax(d__2,d__3); +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1)); + smax = f2cmax(d__2,d__3); +/* L30: */ + } + sminl = 0.; + if (tol >= 0.) { + +/* Relative accuracy desired */ + + sminoa = abs(d__[1]); + if (sminoa == 0.) { + goto L50; + } + mu = sminoa; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1] + , abs(d__1)))); + sminoa = f2cmin(sminoa,mu); + if (sminoa == 0.) { + goto L50; + } +/* L40: */ + } +L50: + sminoa /= sqrt((doublereal) (*n)); +/* Computing MAX */ + d__1 = tol * sminoa, d__2 = *n * (*n * unfl) * 6; + thresh = f2cmax(d__1,d__2); + } else { + +/* Absolute accuracy desired */ + +/* Computing MAX */ + d__1 = abs(tol) * smax, d__2 = *n * (*n * unfl) * 6; + thresh = f2cmax(d__1,d__2); + } + +/* Prepare for main iteration loop for the singular values */ +/* (MAXIT is the maximum number of passes through the inner */ +/* loop permitted before nonconvergence signalled.) */ + + maxitdivn = *n * 6; + iterdivn = 0; + iter = -1; + oldll = -1; + oldm = -1; + +/* M points to last element of unconverged part of matrix */ + + m = *n; + +/* Begin main iteration loop */ + +L60: + +/* Check for convergence or exceeding iteration count */ + + if (m <= 1) { + goto L160; + } + + if (iter >= *n) { + iter -= *n; + ++iterdivn; + if (iterdivn >= maxitdivn) { + goto L200; + } + } + +/* Find diagonal block of matrix to work on */ + + if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) { + d__[m] = 0.; + } + smax = (d__1 = d__[m], abs(d__1)); + smin = smax; + i__1 = m - 1; + for (lll = 1; lll <= i__1; ++lll) { + ll = m - lll; + abss = (d__1 = d__[ll], abs(d__1)); + abse = (d__1 = e[ll], abs(d__1)); + if (tol < 0. && abss <= thresh) { + d__[ll] = 0.; + } + if (abse <= thresh) { + goto L80; + } + smin = f2cmin(smin,abss); +/* Computing MAX */ + d__1 = f2cmax(smax,abss); + smax = f2cmax(d__1,abse); +/* L70: */ + } + ll = 0; + goto L90; +L80: + e[ll] = 0.; + +/* Matrix splits since E(LL) = 0 */ + + if (ll == m - 1) { + +/* Convergence of bottom singular value, return to top of loop */ + + --m; + goto L60; + } +L90: + ++ll; + +/* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ + + if (ll == m - 1) { + +/* 2 by 2 block, handle separately */ + + dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, + &sinl, &cosl); + d__[m - 1] = sigmx; + e[m - 1] = 0.; + d__[m] = sigmn; + +/* Compute singular vectors, if desired */ + + if (*ncvt > 0) { + drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & + cosr, &sinr); + } + if (*nru > 0) { + drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & + c__1, &cosl, &sinl); + } + if (*ncc > 0) { + drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & + cosl, &sinl); + } + m += -2; + goto L60; + } + +/* If working on new submatrix, choose shift direction */ +/* (from larger end diagonal element towards smaller) */ + + if (ll > oldm || m < oldll) { + if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) { + +/* Chase bulge from top (big end) to bottom (small end) */ + + idir = 1; + } else { + +/* Chase bulge from bottom (big end) to top (small end) */ + + idir = 2; + } + } + +/* Apply convergence tests */ + + if (idir == 1) { + +/* Run convergence test in forward direction */ +/* First apply standard test to bottom of matrix */ + + if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs( + d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) + { + e[m - 1] = 0.; + goto L60; + } + + if (tol >= 0.) { + +/* If relative accuracy desired, */ +/* apply convergence criterion forward */ + + mu = (d__1 = d__[ll], abs(d__1)); + sminl = mu; + i__1 = m - 1; + for (lll = ll; lll <= i__1; ++lll) { + if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { + e[lll] = 0.; + goto L60; + } + mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[ + lll], abs(d__1)))); + sminl = f2cmin(sminl,mu); +/* L100: */ + } + } + + } else { + +/* Run convergence test in backward direction */ +/* First apply standard test to top of matrix */ + + if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1) + ) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) { + e[ll] = 0.; + goto L60; + } + + if (tol >= 0.) { + +/* If relative accuracy desired, */ +/* apply convergence criterion backward */ + + mu = (d__1 = d__[m], abs(d__1)); + sminl = mu; + i__1 = ll; + for (lll = m - 1; lll >= i__1; --lll) { + if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { + e[lll] = 0.; + goto L60; + } + mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll] + , abs(d__1)))); + sminl = f2cmin(sminl,mu); +/* L110: */ + } + } + } + oldll = ll; + oldm = m; + +/* Compute shift. First, test if shifting would ruin relative */ +/* accuracy, and if so set the shift to zero. */ + +/* Computing MAX */ + d__1 = eps, d__2 = tol * .01; + if (tol >= 0. && *n * tol * (sminl / smax) <= f2cmax(d__1,d__2)) { + +/* Use a zero shift to avoid loss of relative accuracy */ + + shift = 0.; + } else { + +/* Compute the shift from 2-by-2 block at end of matrix */ + + if (idir == 1) { + sll = (d__1 = d__[ll], abs(d__1)); + dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); + } else { + sll = (d__1 = d__[m], abs(d__1)); + dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); + } + +/* Test if shift negligible, and if so set to zero */ + + if (sll > 0.) { +/* Computing 2nd power */ + d__1 = shift / sll; + if (d__1 * d__1 < eps) { + shift = 0.; + } + } + } + +/* Increment iteration count */ + + iter = iter + m - ll; + +/* If SHIFT = 0, do simplified QR iteration */ + + if (shift == 0.) { + if (idir == 1) { + +/* Chase bulge from top to bottom */ +/* Save cosines and sines for later singular vector updates */ + + cs = 1.; + oldcs = 1.; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + d__1 = d__[i__] * cs; + dlartg_(&d__1, &e[i__], &cs, &sn, &r__); + if (i__ > ll) { + e[i__ - 1] = oldsn * r__; + } + d__1 = oldcs * r__; + d__2 = d__[i__ + 1] * sn; + dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll + 1] = cs; + work[i__ - ll + 1 + nm1] = sn; + work[i__ - ll + 1 + nm12] = oldcs; + work[i__ - ll + 1 + nm13] = oldsn; +/* L120: */ + } + h__ = d__[m] * cs; + d__[m] = h__ * oldcs; + e[m - 1] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { + e[m - 1] = 0.; + } + + } else { + +/* Chase bulge from bottom to top */ +/* Save cosines and sines for later singular vector updates */ + + cs = 1.; + oldcs = 1.; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + d__1 = d__[i__] * cs; + dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__); + if (i__ < m) { + e[i__] = oldsn * r__; + } + d__1 = oldcs * r__; + d__2 = d__[i__ - 1] * sn; + dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll] = cs; + work[i__ - ll + nm1] = -sn; + work[i__ - ll + nm12] = oldcs; + work[i__ - ll + nm13] = -oldsn; +/* L130: */ + } + h__ = d__[ll] * cs; + d__[ll] = h__ * oldcs; + e[ll] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[ll], abs(d__1)) <= thresh) { + e[ll] = 0.; + } + } + } else { + +/* Use nonzero shift */ + + if (idir == 1) { + +/* Chase bulge from top to bottom */ +/* Save cosines and sines for later singular vector updates */ + + f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[ + ll]) + shift / d__[ll]); + g = e[ll]; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + dlartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ > ll) { + e[i__ - 1] = r__; + } + f = cosr * d__[i__] + sinr * e[i__]; + e[i__] = cosr * e[i__] - sinr * d__[i__]; + g = sinr * d__[i__ + 1]; + d__[i__ + 1] = cosr * d__[i__ + 1]; + dlartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__] + sinl * d__[i__ + 1]; + d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; + if (i__ < m - 1) { + g = sinl * e[i__ + 1]; + e[i__ + 1] = cosl * e[i__ + 1]; + } + work[i__ - ll + 1] = cosr; + work[i__ - ll + 1 + nm1] = sinr; + work[i__ - ll + 1 + nm12] = cosl; + work[i__ - ll + 1 + nm13] = sinl; +/* L140: */ + } + e[m - 1] = f; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { + e[m - 1] = 0.; + } + + } else { + +/* Chase bulge from bottom to top */ +/* Save cosines and sines for later singular vector updates */ + + f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[m] + ) + shift / d__[m]); + g = e[m - 1]; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + dlartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ < m) { + e[i__] = r__; + } + f = cosr * d__[i__] + sinr * e[i__ - 1]; + e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; + g = sinr * d__[i__ - 1]; + d__[i__ - 1] = cosr * d__[i__ - 1]; + dlartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; + d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; + if (i__ > ll + 1) { + g = sinl * e[i__ - 2]; + e[i__ - 2] = cosl * e[i__ - 2]; + } + work[i__ - ll] = cosr; + work[i__ - ll + nm1] = -sinr; + work[i__ - ll + nm12] = cosl; + work[i__ - ll + nm13] = -sinl; +/* L150: */ + } + e[ll] = f; + +/* Test convergence */ + + if ((d__1 = e[ll], abs(d__1)) <= thresh) { + e[ll] = 0.; + } + +/* Update singular vectors if desired */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + } + } + +/* QR iteration finished, go back and check convergence */ + + goto L60; + +/* All singular values converged, so make them positive */ + +L160: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] < 0.) { + d__[i__] = -d__[i__]; + +/* Change sign of singular vectors, if desired */ + + if (*ncvt > 0) { + dscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt); + } + } +/* L170: */ + } + +/* Sort the singular values into decreasing order (insertion sort on */ +/* singular values, but only one transposition per singular vector) */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I) */ + + isub = 1; + smin = d__[1]; + i__2 = *n + 1 - i__; + for (j = 2; j <= i__2; ++j) { + if (d__[j] <= smin) { + isub = j; + smin = d__[j]; + } +/* L180: */ + } + if (isub != *n + 1 - i__) { + +/* Swap singular values and vectors */ + + d__[isub] = d__[*n + 1 - i__]; + d__[*n + 1 - i__] = smin; + if (*ncvt > 0) { + dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + + vt_dim1], ldvt); + } + if (*nru > 0) { + dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * + u_dim1 + 1], &c__1); + } + if (*ncc > 0) { + dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + + c_dim1], ldc); + } + } +/* L190: */ + } + goto L220; + +/* Maximum number of iterations exceeded, failure to converge */ + +L200: + *info = 0; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L210: */ + } +L220: + return 0; + +/* End of DBDSQR */ + +} /* dbdsqr_ */ + diff --git a/lapack-netlib/SRC/dbdsvdx.c b/lapack-netlib/SRC/dbdsvdx.c new file mode 100644 index 000000000..4d2f51eae --- /dev/null +++ b/lapack-netlib/SRC/dbdsvdx.c @@ -0,0 +1,1349 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) ceil(w) +#define myhuge_(w) HUGE_VAL +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DBDSVDX */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DBDSVDX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DBDSVDX( UPLO, JOBZ, RANGE, N, D, E, VL, VU, IL, IU, */ +/* $ NS, S, Z, LDZ, WORK, IWORK, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, LDZ, N, NS */ +/* DOUBLE PRECISION VL, VU */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION D( * ), E( * ), S( * ), WORK( * ), */ +/* Z( LDZ, * ) */ + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DBDSVDX computes the singular value decomposition (SVD) of a real */ +/* > N-by-N (upper or lower) bidiagonal matrix B, B = U * S * VT, */ +/* > where S is a diagonal matrix with non-negative diagonal elements */ +/* > (the singular values of B), and U and VT are orthogonal matrices */ +/* > of left and right singular vectors, respectively. */ +/* > */ +/* > Given an upper bidiagonal B with diagonal D = [ d_1 d_2 ... d_N ] */ +/* > and superdiagonal E = [ e_1 e_2 ... e_N-1 ], DBDSVDX computes the */ +/* > singular value decompositon of B through the eigenvalues and */ +/* > eigenvectors of the N*2-by-N*2 tridiagonal matrix */ +/* > */ +/* > | 0 d_1 | */ +/* > | d_1 0 e_1 | */ +/* > TGK = | e_1 0 d_2 | */ +/* > | d_2 . . | */ +/* > | . . . | */ +/* > */ +/* > If (s,u,v) is a singular triplet of B with ||u|| = ||v|| = 1, then */ +/* > (+/-s,q), ||q|| = 1, are eigenpairs of TGK, with q = P * ( u' +/-v' ) / */ +/* > sqrt(2) = ( v_1 u_1 v_2 u_2 ... v_n u_n ) / sqrt(2), and */ +/* > P = [ e_{n+1} e_{1} e_{n+2} e_{2} ... ]. */ +/* > */ +/* > Given a TGK matrix, one can either a) compute -s,-v and change signs */ +/* > so that the singular values (and corresponding vectors) are already in */ +/* > descending order (as in DGESVD/DGESDD) or b) compute s,v and reorder */ +/* > the values (and corresponding vectors). DBDSVDX implements a) by */ +/* > calling DSTEVX (bisection plus inverse iteration, to be replaced */ +/* > with a version of the Multiple Relative Robust Representation */ +/* > algorithm. (See P. Willems and B. Lang, A framework for the MR^3 */ +/* > algorithm: theory and implementation, SIAM J. Sci. Comput., */ +/* > 35:740-766, 2013.) */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': B is upper bidiagonal; */ +/* > = 'L': B is lower bidiagonal. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute singular values only; */ +/* > = 'V': Compute singular values and singular vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all singular values will be found. */ +/* > = 'V': all singular values in the half-open interval [VL,VU) */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th singular values will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the bidiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The n diagonal elements of the bidiagonal matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (f2cmax(1,N-1)) */ +/* > The (n-1) superdiagonal elements of the bidiagonal matrix */ +/* > B in elements 1 to N-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for singular values. VU > VL. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for singular values. VU > VL. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest singular value to be returned. */ +/* > 1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest singular value to be returned. */ +/* > 1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] NS */ +/* > \verbatim */ +/* > NS is INTEGER */ +/* > The total number of singular values found. 0 <= NS <= N. */ +/* > If RANGE = 'A', NS = N, and if RANGE = 'I', NS = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (N) */ +/* > The first NS elements contain the selected singular values in */ +/* > ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension (2*N,K) */ +/* > If JOBZ = 'V', then if INFO = 0 the first NS columns of Z */ +/* > contain the singular vectors of the matrix B corresponding to */ +/* > the selected singular values, with U in rows 1 to N and V */ +/* > in rows N+1 to N*2, i.e. */ +/* > Z = [ U ] */ +/* > [ V ] */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: The user must ensure that at least K = NS+1 columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of */ +/* > NS is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(2,N*2). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (14*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (12*N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first NS elements of */ +/* > IWORK are zero. If INFO > 0, then IWORK contains the indices */ +/* > of the eigenvectors that failed to converge in DSTEVX. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge */ +/* > in DSTEVX. The indices of the eigenvectors */ +/* > (as returned by DSTEVX) are stored in the */ +/* > array IWORK. */ +/* > if INFO = N*2 + 1, an internal error occurred. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup doubleOTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int dbdsvdx_(char *uplo, char *jobz, char *range, integer *n, + doublereal *d__, doublereal *e, doublereal *vl, doublereal *vu, + integer *il, integer *iu, integer *ns, doublereal *s, doublereal *z__, + integer *ldz, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + doublereal emin; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + integer ntgk; + doublereal smin, smax, nrmu, nrmv; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + logical sveq0; + integer i__, idbeg, j, k; + doublereal sqrt2; + integer idend; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + integer isbeg; + extern logical lsame_(char *, char *); + integer idtgk, ietgk, iltgk, itemp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer icolz; + logical allsv; + integer idptr; + logical indsv; + integer ieptr, iutgk; + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + doublereal vltgk; + logical lower; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal zjtji; + logical split, valsv; + integer isplt; + doublereal ortol, vutgk; + logical wantz; + char rngvx[1]; + integer irowu, irowv, irowz; + extern doublereal dlamch_(char *); + integer iifail; + doublereal mu; + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + doublereal abstol, thresh; + integer iiwork; + extern /* Subroutine */ int dstevx_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *, integer *), + mecago_(); + doublereal eps; + integer nsl; + doublereal tol, ulp; + integer nru, nrv; + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + --s; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + allsv = lsame_(range, "A"); + valsv = lsame_(range, "V"); + indsv = lsame_(range, "I"); + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + + *info = 0; + if (! lsame_(uplo, "U") && ! lower) { + *info = -1; + } else if (! (wantz || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (allsv || valsv || indsv)) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*n > 0) { + if (valsv) { + if (*vl < 0.) { + *info = -7; + } else if (*vu <= *vl) { + *info = -8; + } + } else if (indsv) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -9; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n << 1) { + *info = -14; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DBDSVDX", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible (N.LE.1) */ + + *ns = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (allsv || indsv) { + *ns = 1; + s[1] = abs(d__[1]); + } else { + if (*vl < abs(d__[1]) && *vu >= abs(d__[1])) { + *ns = 1; + s[1] = abs(d__[1]); + } + } + if (wantz) { + z__[z_dim1 + 1] = d_sign(&c_b10, &d__[1]); + z__[z_dim1 + 2] = 1.; + } + return 0; + } + + abstol = dlamch_("Safe Minimum") * 2; + ulp = dlamch_("Precision"); + eps = dlamch_("Epsilon"); + sqrt2 = sqrt(2.); + ortol = sqrt(ulp); + +/* Criterion for splitting is taken from DBDSQR when singular */ +/* values are computed to relative accuracy TOL. (See J. Demmel and */ +/* W. Kahan, Accurate singular values of bidiagonal matrices, SIAM */ +/* J. Sci. and Stat. Comput., 11:873–912, 1990.) */ + +/* Computing MAX */ +/* Computing MIN */ + d__3 = 100., d__4 = pow_dd(&eps, &c_b14); + d__1 = 10., d__2 = f2cmin(d__3,d__4); + tol = f2cmax(d__1,d__2) * eps; + +/* Compute approximate maximum, minimum singular values. */ + + i__ = idamax_(n, &d__[1], &c__1); + smax = (d__1 = d__[i__], abs(d__1)); + i__1 = *n - 1; + i__ = idamax_(&i__1, &e[1], &c__1); +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1)); + smax = f2cmax(d__2,d__3); + +/* Compute threshold for neglecting D's and E's. */ + + smin = abs(d__[1]); + if (smin != 0.) { + mu = smin; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1] + , abs(d__1)))); + smin = f2cmin(smin,mu); + if (smin == 0.) { + myexit_(); + } + } + } + smin /= sqrt((doublereal) (*n)); + thresh = tol * smin; + +/* Check for zeros in D and E (splits), i.e. submatrices. */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) <= thresh) { + d__[i__] = 0.; + } + if ((d__1 = e[i__], abs(d__1)) <= thresh) { + e[i__] = 0.; + } + } + if ((d__1 = d__[*n], abs(d__1)) <= thresh) { + d__[*n] = 0.; + } + +/* Pointers for arrays used by DSTEVX. */ + + idtgk = 1; + ietgk = idtgk + (*n << 1); + itemp = ietgk + (*n << 1); + iifail = 1; + iiwork = iifail + (*n << 1); + +/* Set RNGVX, which corresponds to RANGE for DSTEVX in TGK mode. */ +/* VL,VU or IL,IU are redefined to conform to implementation a) */ +/* described in the leading comments. */ + + iltgk = 0; + iutgk = 0; + vltgk = 0.; + vutgk = 0.; + + if (allsv) { + +/* All singular values will be found. We aim at -s (see */ +/* leading comments) with RNGVX = 'I'. IL and IU are set */ +/* later (as ILTGK and IUTGK) according to the dimension */ +/* of the active submatrix. */ + + *(unsigned char *)rngvx = 'I'; + if (wantz) { + i__1 = *n << 1; + i__2 = *n + 1; + dlaset_("F", &i__1, &i__2, &c_b19, &c_b19, &z__[z_offset], ldz); + } + } else if (valsv) { + +/* Find singular values in a half-open interval. We aim */ +/* at -s (see leading comments) and we swap VL and VU */ +/* (as VUTGK and VLTGK), changing their signs. */ + + *(unsigned char *)rngvx = 'V'; + vltgk = -(*vu); + vutgk = -(*vl); + i__1 = idtgk + (*n << 1) - 1; + for (i__ = idtgk; i__ <= i__1; ++i__) { + work[i__] = 0.; + } +/* WORK( IDTGK:IDTGK+2*N-1 ) = ZERO */ + dcopy_(n, &d__[1], &c__1, &work[ietgk], &c__2); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &work[ietgk + 1], &c__2); + i__1 = *n << 1; + dstevx_("N", "V", &i__1, &work[idtgk], &work[ietgk], &vltgk, &vutgk, & + iltgk, &iltgk, &abstol, ns, &s[1], &z__[z_offset], ldz, &work[ + itemp], &iwork[iiwork], &iwork[iifail], info); + if (*ns == 0) { + return 0; + } else { + if (wantz) { + i__1 = *n << 1; + dlaset_("F", &i__1, ns, &c_b19, &c_b19, &z__[z_offset], ldz); + } + } + } else if (indsv) { + +/* Find the IL-th through the IU-th singular values. We aim */ +/* at -s (see leading comments) and indices are mapped into */ +/* values, therefore mimicking DSTEBZ, where */ + +/* GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN */ +/* GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN */ + + iltgk = *il; + iutgk = *iu; + *(unsigned char *)rngvx = 'V'; + i__1 = idtgk + (*n << 1) - 1; + for (i__ = idtgk; i__ <= i__1; ++i__) { + work[i__] = 0.; + } +/* WORK( IDTGK:IDTGK+2*N-1 ) = ZERO */ + dcopy_(n, &d__[1], &c__1, &work[ietgk], &c__2); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &work[ietgk + 1], &c__2); + i__1 = *n << 1; + dstevx_("N", "I", &i__1, &work[idtgk], &work[ietgk], &vltgk, &vltgk, & + iltgk, &iltgk, &abstol, ns, &s[1], &z__[z_offset], ldz, &work[ + itemp], &iwork[iiwork], &iwork[iifail], info); + vltgk = s[1] - smax * 2. * ulp * *n; + i__1 = idtgk + (*n << 1) - 1; + for (i__ = idtgk; i__ <= i__1; ++i__) { + work[i__] = 0.; + } +/* WORK( IDTGK:IDTGK+2*N-1 ) = ZERO */ + dcopy_(n, &d__[1], &c__1, &work[ietgk], &c__2); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &work[ietgk + 1], &c__2); + i__1 = *n << 1; + dstevx_("N", "I", &i__1, &work[idtgk], &work[ietgk], &vutgk, &vutgk, & + iutgk, &iutgk, &abstol, ns, &s[1], &z__[z_offset], ldz, &work[ + itemp], &iwork[iiwork], &iwork[iifail], info); + vutgk = s[1] + smax * 2. * ulp * *n; + vutgk = f2cmin(vutgk,0.); + +/* If VLTGK=VUTGK, DSTEVX returns an error message, */ +/* so if needed we change VUTGK slightly. */ + + if (vltgk == vutgk) { + vltgk -= tol; + } + + if (wantz) { + i__1 = *n << 1; + i__2 = *iu - *il + 1; + dlaset_("F", &i__1, &i__2, &c_b19, &c_b19, &z__[z_offset], ldz); + } + } + +/* Initialize variables and pointers for S, Z, and WORK. */ + +/* NRU, NRV: number of rows in U and V for the active submatrix */ +/* IDBEG, ISBEG: offsets for the entries of D and S */ +/* IROWZ, ICOLZ: offsets for the rows and columns of Z */ +/* IROWU, IROWV: offsets for the rows of U and V */ + + *ns = 0; + nru = 0; + nrv = 0; + idbeg = 1; + isbeg = 1; + irowz = 1; + icolz = 1; + irowu = 2; + irowv = 1; + split = FALSE_; + sveq0 = FALSE_; + +/* Form the tridiagonal TGK matrix. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + s[i__] = 0.; + } +/* S( 1:N ) = ZERO */ + work[ietgk + (*n << 1) - 1] = 0.; + i__1 = idtgk + (*n << 1) - 1; + for (i__ = idtgk; i__ <= i__1; ++i__) { + work[i__] = 0.; + } +/* WORK( IDTGK:IDTGK+2*N-1 ) = ZERO */ + dcopy_(n, &d__[1], &c__1, &work[ietgk], &c__2); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &work[ietgk + 1], &c__2); + + +/* Check for splits in two levels, outer level */ +/* in E and inner level in D. */ + + i__1 = *n << 1; + for (ieptr = 2; ieptr <= i__1; ieptr += 2) { + if (work[ietgk + ieptr - 1] == 0.) { + +/* Split in E (this piece of B is square) or bottom */ +/* of the (input bidiagonal) matrix. */ + + isplt = idbeg; + idend = ieptr - 1; + i__2 = idend; + for (idptr = idbeg; idptr <= i__2; idptr += 2) { + if (work[ietgk + idptr - 1] == 0.) { + +/* Split in D (rectangular submatrix). Set the number */ +/* of rows in U and V (NRU and NRV) accordingly. */ + + if (idptr == idbeg) { + +/* D=0 at the top. */ + + sveq0 = TRUE_; + if (idbeg == idend) { + nru = 1; + nrv = 1; + } + } else if (idptr == idend) { + +/* D=0 at the bottom. */ + + sveq0 = TRUE_; + nru = (idend - isplt) / 2 + 1; + nrv = nru; + if (isplt != idbeg) { + ++nru; + } + } else { + if (isplt == idbeg) { + +/* Split: top rectangular submatrix. */ + + nru = (idptr - idbeg) / 2; + nrv = nru + 1; + } else { + +/* Split: middle square submatrix. */ + + nru = (idptr - isplt) / 2 + 1; + nrv = nru; + } + } + } else if (idptr == idend) { + +/* Last entry of D in the active submatrix. */ + + if (isplt == idbeg) { + +/* No split (trivial case). */ + + nru = (idend - idbeg) / 2 + 1; + nrv = nru; + } else { + +/* Split: bottom rectangular submatrix. */ + + nrv = (idend - isplt) / 2 + 1; + nru = nrv + 1; + } + } + + ntgk = nru + nrv; + + if (ntgk > 0) { + +/* Compute eigenvalues/vectors of the active */ +/* submatrix according to RANGE: */ +/* if RANGE='A' (ALLSV) then RNGVX = 'I' */ +/* if RANGE='V' (VALSV) then RNGVX = 'V' */ +/* if RANGE='I' (INDSV) then RNGVX = 'V' */ + + iltgk = 1; + iutgk = ntgk / 2; + if (allsv || vutgk == 0.) { + if (sveq0 || smin < eps || ntgk % 2 > 0) { +/* Special case: eigenvalue equal to zero or very */ +/* small, additional eigenvector is needed. */ + ++iutgk; + } + } + +/* Workspace needed by DSTEVX: */ +/* WORK( ITEMP: ): 2*5*NTGK */ +/* IWORK( 1: ): 2*6*NTGK */ + + dstevx_(jobz, rngvx, &ntgk, &work[idtgk + isplt - 1], & + work[ietgk + isplt - 1], &vltgk, &vutgk, &iltgk, & + iutgk, &abstol, &nsl, &s[isbeg], &z__[irowz + + icolz * z_dim1], ldz, &work[itemp], &iwork[iiwork] + , &iwork[iifail], info); + if (*info != 0) { +/* Exit with the error code from DSTEVX. */ + return 0; + } + emin = (d__1 = s[isbeg], abs(d__1)); + i__3 = isbeg + nsl - 1; + for (i__ = isbeg; i__ <= i__3; ++i__) { + if ((d__1 = s[i__], abs(d__1)) > emin) { + emin = s[i__]; + } + } +/* EMIN = ABS( MAXVAL( S( ISBEG:ISBEG+NSL-1 ) ) ) */ + + if (nsl > 0 && wantz) { + +/* Normalize u=Z([2,4,...],:) and v=Z([1,3,...],:), */ +/* changing the sign of v as discussed in the leading */ +/* comments. The norms of u and v may be (slightly) */ +/* different from 1/sqrt(2) if the corresponding */ +/* eigenvalues are very small or too close. We check */ +/* those norms and, if needed, reorthogonalize the */ +/* vectors. */ + + if (nsl > 1 && vutgk == 0. && ntgk % 2 == 0 && emin == + 0. && ! split) { + +/* D=0 at the top or bottom of the active submatrix: */ +/* one eigenvalue is equal to zero; concatenate the */ +/* eigenvectors corresponding to the two smallest */ +/* eigenvalues. */ + + i__3 = irowz + ntgk - 1; + for (i__ = irowz; i__ <= i__3; ++i__) { + z__[i__ + (icolz + nsl - 2) * z_dim1] += z__[ + i__ + (icolz + nsl - 1) * z_dim1]; + z__[i__ + (icolz + nsl - 1) * z_dim1] = 0.; + } +/* Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-2 ) = */ +/* $ Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-2 ) + */ +/* $ Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-1 ) */ +/* Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-1 ) = */ +/* $ ZERO */ +/* IF( IUTGK*2.GT.NTGK ) THEN */ +/* Eigenvalue equal to zero or very small. */ +/* NSL = NSL - 1 */ +/* END IF */ + } + +/* Computing MIN */ + i__4 = nsl - 1, i__5 = nru - 1; + i__3 = f2cmin(i__4,i__5); + for (i__ = 0; i__ <= i__3; ++i__) { + nrmu = dnrm2_(&nru, &z__[irowu + (icolz + i__) * + z_dim1], &c__2); + if (nrmu == 0.) { + *info = (*n << 1) + 1; + return 0; + } + d__1 = 1. / nrmu; + dscal_(&nru, &d__1, &z__[irowu + (icolz + i__) * + z_dim1], &c__2); + if (nrmu != 1. && (d__1 = nrmu - ortol, abs(d__1)) + * sqrt2 > 1.) { + i__4 = i__ - 1; + for (j = 0; j <= i__4; ++j) { + zjtji = -ddot_(&nru, &z__[irowu + (icolz + + j) * z_dim1], &c__2, &z__[irowu + + (icolz + i__) * z_dim1], &c__2); + daxpy_(&nru, &zjtji, &z__[irowu + (icolz + + j) * z_dim1], &c__2, &z__[irowu + + (icolz + i__) * z_dim1], &c__2); + } + nrmu = dnrm2_(&nru, &z__[irowu + (icolz + i__) + * z_dim1], &c__2); + d__1 = 1. / nrmu; + dscal_(&nru, &d__1, &z__[irowu + (icolz + i__) + * z_dim1], &c__2); + } + } +/* Computing MIN */ + i__4 = nsl - 1, i__5 = nrv - 1; + i__3 = f2cmin(i__4,i__5); + for (i__ = 0; i__ <= i__3; ++i__) { + nrmv = dnrm2_(&nrv, &z__[irowv + (icolz + i__) * + z_dim1], &c__2); + if (nrmv == 0.) { + *info = (*n << 1) + 1; + return 0; + } + d__1 = -1. / nrmv; + dscal_(&nrv, &d__1, &z__[irowv + (icolz + i__) * + z_dim1], &c__2); + if (nrmv != 1. && (d__1 = nrmv - ortol, abs(d__1)) + * sqrt2 > 1.) { + i__4 = i__ - 1; + for (j = 0; j <= i__4; ++j) { + zjtji = -ddot_(&nrv, &z__[irowv + (icolz + + j) * z_dim1], &c__2, &z__[irowv + + (icolz + i__) * z_dim1], &c__2); + daxpy_(&nru, &zjtji, &z__[irowv + (icolz + + j) * z_dim1], &c__2, &z__[irowv + + (icolz + i__) * z_dim1], &c__2); + } + nrmv = dnrm2_(&nrv, &z__[irowv + (icolz + i__) + * z_dim1], &c__2); + d__1 = 1. / nrmv; + dscal_(&nrv, &d__1, &z__[irowv + (icolz + i__) + * z_dim1], &c__2); + } + } + if (vutgk == 0. && idptr < idend && ntgk % 2 > 0) { + +/* D=0 in the middle of the active submatrix (one */ +/* eigenvalue is equal to zero): save the corresponding */ +/* eigenvector for later use (when bottom of the */ +/* active submatrix is reached). */ + + split = TRUE_; + i__3 = irowz + ntgk - 1; + for (i__ = irowz; i__ <= i__3; ++i__) { + z__[i__ + (*n + 1) * z_dim1] = z__[i__ + (*ns + + nsl) * z_dim1]; + z__[i__ + (*ns + nsl) * z_dim1] = 0.; + } +/* Z( IROWZ:IROWZ+NTGK-1,N+1 ) = */ +/* $ Z( IROWZ:IROWZ+NTGK-1,NS+NSL ) */ +/* Z( IROWZ:IROWZ+NTGK-1,NS+NSL ) = */ +/* $ ZERO */ + } + } + +/* ** WANTZ **! */ + nsl = f2cmin(nsl,nru); + sveq0 = FALSE_; + +/* Absolute values of the eigenvalues of TGK. */ + + i__3 = nsl - 1; + for (i__ = 0; i__ <= i__3; ++i__) { + s[isbeg + i__] = (d__1 = s[isbeg + i__], abs(d__1)); + } + +/* Update pointers for TGK, S and Z. */ + + isbeg += nsl; + irowz += ntgk; + icolz += nsl; + irowu = irowz; + irowv = irowz + 1; + isplt = idptr + 1; + *ns += nsl; + nru = 0; + nrv = 0; + } +/* ** NTGK.GT.0 **! */ + if (irowz < *n << 1 && wantz) { + i__3 = irowz - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + z__[i__ + icolz * z_dim1] = 0.; + } +/* Z( 1:IROWZ-1, ICOLZ ) = ZERO */ + } + } +/* ** IDPTR loop **! */ + if (split && wantz) { + +/* Bring back eigenvector corresponding */ +/* to eigenvalue equal to zero. */ + + i__2 = idend - ntgk + 1; + for (i__ = idbeg; i__ <= i__2; ++i__) { + z__[i__ + (isbeg - 1) * z_dim1] += z__[i__ + (*n + 1) * + z_dim1]; + z__[i__ + (*n + 1) * z_dim1] = 0.; + } +/* Z( IDBEG:IDEND-NTGK+1,ISBEG-1 ) = */ +/* $ Z( IDBEG:IDEND-NTGK+1,ISBEG-1 ) + */ +/* $ Z( IDBEG:IDEND-NTGK+1,N+1 ) */ +/* Z( IDBEG:IDEND-NTGK+1,N+1 ) = 0 */ + } + --irowv; + ++irowu; + idbeg = ieptr + 1; + sveq0 = FALSE_; + split = FALSE_; + } +/* ** Check for split in E **! */ + } + +/* Sort the singular values into decreasing order (insertion sort on */ +/* singular values, but only one transposition per singular vector) */ + +/* ** IEPTR loop **! */ + i__1 = *ns - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + k = 1; + smin = s[1]; + i__2 = *ns + 1 - i__; + for (j = 2; j <= i__2; ++j) { + if (s[j] <= smin) { + k = j; + smin = s[j]; + } + } + if (k != *ns + 1 - i__) { + s[k] = s[*ns + 1 - i__]; + s[*ns + 1 - i__] = smin; + if (wantz) { + i__2 = *n << 1; + dswap_(&i__2, &z__[k * z_dim1 + 1], &c__1, &z__[(*ns + 1 - + i__) * z_dim1 + 1], &c__1); + } + } + } + +/* If RANGE=I, check for singular values/vectors to be discarded. */ + + if (indsv) { + k = *iu - *il + 1; + if (k < *ns) { + i__1 = *ns; + for (i__ = k + 1; i__ <= i__1; ++i__) { + s[i__] = 0.; + } +/* S( K+1:NS ) = ZERO */ + if (wantz) { + i__1 = *n << 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *ns; + for (j = k + 1; j <= i__2; ++j) { + z__[i__ + j * z_dim1] = 0.; + } + } +/* Z( 1:N*2,K+1:NS ) = ZERO */ + } + *ns = k; + } + } + +/* Reorder Z: U = Z( 1:N,1:NS ), V = Z( N+1:N*2,1:NS ). */ +/* If B is a lower diagonal, swap U and V. */ + + if (wantz) { + i__1 = *ns; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n << 1; + dcopy_(&i__2, &z__[i__ * z_dim1 + 1], &c__1, &work[1], &c__1); + if (lower) { + dcopy_(n, &work[2], &c__2, &z__[*n + 1 + i__ * z_dim1], &c__1) + ; + dcopy_(n, &work[1], &c__2, &z__[i__ * z_dim1 + 1], &c__1); + } else { + dcopy_(n, &work[2], &c__2, &z__[i__ * z_dim1 + 1], &c__1); + dcopy_(n, &work[1], &c__2, &z__[*n + 1 + i__ * z_dim1], &c__1) + ; + } + } + } + + return 0; + +/* End of DBDSVDX */ + +} /* dbdsvdx_ */ + diff --git a/lapack-netlib/SRC/dcombssq.c b/lapack-netlib/SRC/dcombssq.c new file mode 100644 index 000000000..0990d084c --- /dev/null +++ b/lapack-netlib/SRC/dcombssq.c @@ -0,0 +1,486 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DCOMBSSQ adds two scaled sum of squares quantities. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DCOMBSSQ( V1, V2 ) */ + +/* DOUBLE PRECISION V1( 2 ), V2( 2 ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DCOMBSSQ adds two scaled sum of squares quantities, V1 := V1 + V2. */ +/* > That is, */ +/* > */ +/* > V1_scale**2 * V1_sumsq := V1_scale**2 * V1_sumsq */ +/* > + V2_scale**2 * V2_sumsq */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in,out] V1 */ +/* > \verbatim */ +/* > V1 is DOUBLE PRECISION array, dimension (2). */ +/* > The first scaled sum. */ +/* > V1(1) = V1_scale, V1(2) = V1_sumsq. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V2 */ +/* > \verbatim */ +/* > V2 is DOUBLE PRECISION array, dimension (2). */ +/* > The second scaled sum. */ +/* > V2(1) = V2_scale, V2(2) = V2_sumsq. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2018 */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dcombssq_(doublereal *v1, doublereal *v2) +{ + /* System generated locals */ + doublereal d__1; + + +/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2018 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --v2; + --v1; + + /* Function Body */ + if (v1[1] >= v2[1]) { + if (v1[1] != 0.) { +/* Computing 2nd power */ + d__1 = v2[1] / v1[1]; + v1[2] += d__1 * d__1 * v2[2]; + } else { + v1[2] += v2[2]; + } + } else { +/* Computing 2nd power */ + d__1 = v1[1] / v2[1]; + v1[2] = v2[2] + d__1 * d__1 * v1[2]; + v1[1] = v2[1]; + } + return 0; + +/* End of DCOMBSSQ */ + +} /* dcombssq_ */ + diff --git a/lapack-netlib/SRC/ddisna.c b/lapack-netlib/SRC/ddisna.c new file mode 100644 index 000000000..e34d9a03c --- /dev/null +++ b/lapack-netlib/SRC/ddisna.c @@ -0,0 +1,651 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DDISNA */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DDISNA + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DDISNA( JOB, M, N, D, SEP, INFO ) */ + +/* CHARACTER JOB */ +/* INTEGER INFO, M, N */ +/* DOUBLE PRECISION D( * ), SEP( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DDISNA computes the reciprocal condition numbers for the eigenvectors */ +/* > of a real symmetric or complex Hermitian matrix or for the left or */ +/* > right singular vectors of a general m-by-n matrix. The reciprocal */ +/* > condition number is the 'gap' between the corresponding eigenvalue or */ +/* > singular value and the nearest other one. */ +/* > */ +/* > The bound on the error, measured by angle in radians, in the I-th */ +/* > computed vector is given by */ +/* > */ +/* > DLAMCH( 'E' ) * ( ANORM / SEP( I ) ) */ +/* > */ +/* > where ANORM = 2-norm(A) = f2cmax( abs( D(j) ) ). SEP(I) is not allowed */ +/* > to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of */ +/* > the error bound. */ +/* > */ +/* > DDISNA may also be used to compute error bounds for eigenvectors of */ +/* > the generalized symmetric definite eigenproblem. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > Specifies for which problem the reciprocal condition numbers */ +/* > should be computed: */ +/* > = 'E': the eigenvectors of a symmetric/Hermitian matrix; */ +/* > = 'L': the left singular vectors of a general matrix; */ +/* > = 'R': the right singular vectors of a general matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > If JOB = 'L' or 'R', the number of columns of the matrix, */ +/* > in which case N >= 0. Ignored if JOB = 'E'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (M) if JOB = 'E' */ +/* > dimension (f2cmin(M,N)) if JOB = 'L' or 'R' */ +/* > The eigenvalues (if JOB = 'E') or singular values (if JOB = */ +/* > 'L' or 'R') of the matrix, in either increasing or decreasing */ +/* > order. If singular values, they must be non-negative. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SEP */ +/* > \verbatim */ +/* > SEP is DOUBLE PRECISION array, dimension (M) if JOB = 'E' */ +/* > dimension (f2cmin(M,N)) if JOB = 'L' or 'R' */ +/* > The reciprocal condition numbers of the vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int ddisna_(char *job, integer *m, integer *n, doublereal * + d__, doublereal *sep, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3; + + /* Local variables */ + logical decr, left, incr, sing; + integer i__, k; + logical eigen; + extern logical lsame_(char *, char *); + doublereal anorm; + logical right; + extern doublereal dlamch_(char *); + doublereal oldgap, safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal newgap, thresh, eps; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + --sep; + --d__; + + /* Function Body */ + *info = 0; + eigen = lsame_(job, "E"); + left = lsame_(job, "L"); + right = lsame_(job, "R"); + sing = left || right; + if (eigen) { + k = *m; + } else if (sing) { + k = f2cmin(*m,*n); + } + if (! eigen && ! sing) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (k < 0) { + *info = -3; + } else { + incr = TRUE_; + decr = TRUE_; + i__1 = k - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (incr) { + incr = incr && d__[i__] <= d__[i__ + 1]; + } + if (decr) { + decr = decr && d__[i__] >= d__[i__ + 1]; + } +/* L10: */ + } + if (sing && k > 0) { + if (incr) { + incr = incr && 0. <= d__[1]; + } + if (decr) { + decr = decr && d__[k] >= 0.; + } + } + if (! (incr || decr)) { + *info = -4; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DDISNA", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + return 0; + } + +/* Compute reciprocal condition numbers */ + + if (k == 1) { + sep[1] = dlamch_("O"); + } else { + oldgap = (d__1 = d__[2] - d__[1], abs(d__1)); + sep[1] = oldgap; + i__1 = k - 1; + for (i__ = 2; i__ <= i__1; ++i__) { + newgap = (d__1 = d__[i__ + 1] - d__[i__], abs(d__1)); + sep[i__] = f2cmin(oldgap,newgap); + oldgap = newgap; +/* L20: */ + } + sep[k] = oldgap; + } + if (sing) { + if (left && *m > *n || right && *m < *n) { + if (incr) { + sep[1] = f2cmin(sep[1],d__[1]); + } + if (decr) { +/* Computing MIN */ + d__1 = sep[k], d__2 = d__[k]; + sep[k] = f2cmin(d__1,d__2); + } + } + } + +/* Ensure that reciprocal condition numbers are not less than */ +/* threshold, in order to limit the size of the error bound */ + + eps = dlamch_("E"); + safmin = dlamch_("S"); +/* Computing MAX */ + d__2 = abs(d__[1]), d__3 = (d__1 = d__[k], abs(d__1)); + anorm = f2cmax(d__2,d__3); + if (anorm == 0.) { + thresh = eps; + } else { +/* Computing MAX */ + d__1 = eps * anorm; + thresh = f2cmax(d__1,safmin); + } + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = sep[i__]; + sep[i__] = f2cmax(d__1,thresh); +/* L30: */ + } + + return 0; + +/* End of DDISNA */ + +} /* ddisna_ */ + diff --git a/lapack-netlib/SRC/dgbbrd.c b/lapack-netlib/SRC/dgbbrd.c new file mode 100644 index 000000000..b54d35d11 --- /dev/null +++ b/lapack-netlib/SRC/dgbbrd.c @@ -0,0 +1,1033 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGBBRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBBRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, */ +/* LDQ, PT, LDPT, C, LDC, WORK, INFO ) */ + +/* CHARACTER VECT */ +/* INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC */ +/* DOUBLE PRECISION AB( LDAB, * ), C( LDC, * ), D( * ), E( * ), */ +/* $ PT( LDPT, * ), Q( LDQ, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBBRD reduces a real general m-by-n band matrix A to upper */ +/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */ +/* > */ +/* > The routine computes B, and optionally forms Q or P**T, or computes */ +/* > Q**T*C for a given matrix C. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > Specifies whether or not the matrices Q and P**T are to be */ +/* > formed. */ +/* > = 'N': do not form Q or P**T; */ +/* > = 'Q': form Q only; */ +/* > = 'P': form P**T only; */ +/* > = 'B': form both. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NCC */ +/* > \verbatim */ +/* > NCC is INTEGER */ +/* > The number of columns of the matrix C. NCC >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals of the matrix A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals of the matrix A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > On entry, the m-by-n band matrix A, stored in rows 1 to */ +/* > KL+KU+1. The j-th column of A is stored in the j-th column of */ +/* > the array AB as follows: */ +/* > AB(ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(m,j+kl). */ +/* > On exit, A is overwritten by values generated during the */ +/* > reduction. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array A. LDAB >= KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The diagonal elements of the bidiagonal matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (f2cmin(M,N)-1) */ +/* > The superdiagonal elements of the bidiagonal matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION array, dimension (LDQ,M) */ +/* > If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. */ +/* > If VECT = 'N' or 'P', the array Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. */ +/* > LDQ >= f2cmax(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] PT */ +/* > \verbatim */ +/* > PT is DOUBLE PRECISION array, dimension (LDPT,N) */ +/* > If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. */ +/* > If VECT = 'N' or 'Q', the array PT is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDPT */ +/* > \verbatim */ +/* > LDPT is INTEGER */ +/* > The leading dimension of the array PT. */ +/* > LDPT >= f2cmax(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,NCC) */ +/* > On entry, an m-by-ncc matrix C. */ +/* > On exit, C is overwritten by Q**T*C. */ +/* > C is not referenced if NCC = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. */ +/* > LDC >= f2cmax(1,M) if NCC > 0; LDC >= 1 if NCC = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (2*f2cmax(M,N)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgbbrd_(char *vect, integer *m, integer *n, integer *ncc, + integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal * + d__, doublereal *e, doublereal *q, integer *ldq, doublereal *pt, + integer *ldpt, doublereal *c__, integer *ldc, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, c_dim1, c_offset, pt_dim1, pt_offset, q_dim1, + q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + + /* Local variables */ + integer inca; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer i__, j, l; + extern logical lsame_(char *, char *); + logical wantb, wantc; + integer minmn; + logical wantq; + integer j1, j2, kb; + doublereal ra, rb; + integer kk; + doublereal rc; + integer ml, mn, nr, mu; + doublereal rs; + extern /* Subroutine */ int dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), xerbla_(char *, integer *, ftnlen), dlargv_( + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlartv_(integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + integer kb1, ml0; + logical wantpt; + integer mu0, klm, kun, nrt, klu1; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + pt_dim1 = *ldpt; + pt_offset = 1 + pt_dim1 * 1; + pt -= pt_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --work; + + /* Function Body */ + wantb = lsame_(vect, "B"); + wantq = lsame_(vect, "Q") || wantb; + wantpt = lsame_(vect, "P") || wantb; + wantc = *ncc > 0; + klu1 = *kl + *ku + 1; + *info = 0; + if (! wantq && ! wantpt && ! lsame_(vect, "N")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ncc < 0) { + *info = -4; + } else if (*kl < 0) { + *info = -5; + } else if (*ku < 0) { + *info = -6; + } else if (*ldab < klu1) { + *info = -8; + } else if (*ldq < 1 || wantq && *ldq < f2cmax(1,*m)) { + *info = -12; + } else if (*ldpt < 1 || wantpt && *ldpt < f2cmax(1,*n)) { + *info = -14; + } else if (*ldc < 1 || wantc && *ldc < f2cmax(1,*m)) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBBRD", &i__1, (ftnlen)6); + return 0; + } + +/* Initialize Q and P**T to the unit matrix, if needed */ + + if (wantq) { + dlaset_("Full", m, m, &c_b8, &c_b9, &q[q_offset], ldq); + } + if (wantpt) { + dlaset_("Full", n, n, &c_b8, &c_b9, &pt[pt_offset], ldpt); + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + + minmn = f2cmin(*m,*n); + + if (*kl + *ku > 1) { + +/* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce */ +/* first to lower bidiagonal form and then transform to upper */ +/* bidiagonal */ + + if (*ku > 0) { + ml0 = 1; + mu0 = 2; + } else { + ml0 = 2; + mu0 = 1; + } + +/* Wherever possible, plane rotations are generated and applied in */ +/* vector operations of length NR over the index set J1:J2:KLU1. */ + +/* The sines of the plane rotations are stored in WORK(1:f2cmax(m,n)) */ +/* and the cosines in WORK(f2cmax(m,n)+1:2*f2cmax(m,n)). */ + + mn = f2cmax(*m,*n); +/* Computing MIN */ + i__1 = *m - 1; + klm = f2cmin(i__1,*kl); +/* Computing MIN */ + i__1 = *n - 1; + kun = f2cmin(i__1,*ku); + kb = klm + kun; + kb1 = kb + 1; + inca = kb1 * *ldab; + nr = 0; + j1 = klm + 2; + j2 = 1 - kun; + + i__1 = minmn; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Reduce i-th column and i-th row of matrix to bidiagonal form */ + + ml = klm + 1; + mu = kun + 1; + i__2 = kb; + for (kk = 1; kk <= i__2; ++kk) { + j1 += kb; + j2 += kb; + +/* generate plane rotations to annihilate nonzero elements */ +/* which have been created below the band */ + + if (nr > 0) { + dlargv_(&nr, &ab[klu1 + (j1 - klm - 1) * ab_dim1], &inca, + &work[j1], &kb1, &work[mn + j1], &kb1); + } + +/* apply plane rotations from the left */ + + i__3 = kb; + for (l = 1; l <= i__3; ++l) { + if (j2 - klm + l - 1 > *n) { + nrt = nr - 1; + } else { + nrt = nr; + } + if (nrt > 0) { + dlartv_(&nrt, &ab[klu1 - l + (j1 - klm + l - 1) * + ab_dim1], &inca, &ab[klu1 - l + 1 + (j1 - klm + + l - 1) * ab_dim1], &inca, &work[mn + j1], & + work[j1], &kb1); + } +/* L10: */ + } + + if (ml > ml0) { + if (ml <= *m - i__ + 1) { + +/* generate plane rotation to annihilate a(i+ml-1,i) */ +/* within the band, and apply rotation from the left */ + + dlartg_(&ab[*ku + ml - 1 + i__ * ab_dim1], &ab[*ku + + ml + i__ * ab_dim1], &work[mn + i__ + ml - 1], + &work[i__ + ml - 1], &ra); + ab[*ku + ml - 1 + i__ * ab_dim1] = ra; + if (i__ < *n) { +/* Computing MIN */ + i__4 = *ku + ml - 2, i__5 = *n - i__; + i__3 = f2cmin(i__4,i__5); + i__6 = *ldab - 1; + i__7 = *ldab - 1; + drot_(&i__3, &ab[*ku + ml - 2 + (i__ + 1) * + ab_dim1], &i__6, &ab[*ku + ml - 1 + (i__ + + 1) * ab_dim1], &i__7, &work[mn + i__ + + ml - 1], &work[i__ + ml - 1]); + } + } + ++nr; + j1 -= kb1; + } + + if (wantq) { + +/* accumulate product of plane rotations in Q */ + + i__3 = j2; + i__4 = kb1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) + { + drot_(m, &q[(j - 1) * q_dim1 + 1], &c__1, &q[j * + q_dim1 + 1], &c__1, &work[mn + j], &work[j]); +/* L20: */ + } + } + + if (wantc) { + +/* apply plane rotations to C */ + + i__4 = j2; + i__3 = kb1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) + { + drot_(ncc, &c__[j - 1 + c_dim1], ldc, &c__[j + c_dim1] + , ldc, &work[mn + j], &work[j]); +/* L30: */ + } + } + + if (j2 + kun > *n) { + +/* adjust J2 to keep within the bounds of the matrix */ + + --nr; + j2 -= kb1; + } + + i__3 = j2; + i__4 = kb1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* create nonzero element a(j-1,j+ku) above the band */ +/* and store it in WORK(n+1:2*n) */ + + work[j + kun] = work[j] * ab[(j + kun) * ab_dim1 + 1]; + ab[(j + kun) * ab_dim1 + 1] = work[mn + j] * ab[(j + kun) + * ab_dim1 + 1]; +/* L40: */ + } + +/* generate plane rotations to annihilate nonzero elements */ +/* which have been generated above the band */ + + if (nr > 0) { + dlargv_(&nr, &ab[(j1 + kun - 1) * ab_dim1 + 1], &inca, & + work[j1 + kun], &kb1, &work[mn + j1 + kun], &kb1); + } + +/* apply plane rotations from the right */ + + i__4 = kb; + for (l = 1; l <= i__4; ++l) { + if (j2 + l - 1 > *m) { + nrt = nr - 1; + } else { + nrt = nr; + } + if (nrt > 0) { + dlartv_(&nrt, &ab[l + 1 + (j1 + kun - 1) * ab_dim1], & + inca, &ab[l + (j1 + kun) * ab_dim1], &inca, & + work[mn + j1 + kun], &work[j1 + kun], &kb1); + } +/* L50: */ + } + + if (ml == ml0 && mu > mu0) { + if (mu <= *n - i__ + 1) { + +/* generate plane rotation to annihilate a(i,i+mu-1) */ +/* within the band, and apply rotation from the right */ + + dlartg_(&ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1], + &ab[*ku - mu + 2 + (i__ + mu - 1) * ab_dim1], + &work[mn + i__ + mu - 1], &work[i__ + mu - 1], + &ra); + ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1] = ra; +/* Computing MIN */ + i__3 = *kl + mu - 2, i__5 = *m - i__; + i__4 = f2cmin(i__3,i__5); + drot_(&i__4, &ab[*ku - mu + 4 + (i__ + mu - 2) * + ab_dim1], &c__1, &ab[*ku - mu + 3 + (i__ + mu + - 1) * ab_dim1], &c__1, &work[mn + i__ + mu - + 1], &work[i__ + mu - 1]); + } + ++nr; + j1 -= kb1; + } + + if (wantpt) { + +/* accumulate product of plane rotations in P**T */ + + i__4 = j2; + i__3 = kb1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) + { + drot_(n, &pt[j + kun - 1 + pt_dim1], ldpt, &pt[j + + kun + pt_dim1], ldpt, &work[mn + j + kun], & + work[j + kun]); +/* L60: */ + } + } + + if (j2 + kb > *m) { + +/* adjust J2 to keep within the bounds of the matrix */ + + --nr; + j2 -= kb1; + } + + i__3 = j2; + i__4 = kb1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* create nonzero element a(j+kl+ku,j+ku-1) below the */ +/* band and store it in WORK(1:n) */ + + work[j + kb] = work[j + kun] * ab[klu1 + (j + kun) * + ab_dim1]; + ab[klu1 + (j + kun) * ab_dim1] = work[mn + j + kun] * ab[ + klu1 + (j + kun) * ab_dim1]; +/* L70: */ + } + + if (ml > ml0) { + --ml; + } else { + --mu; + } +/* L80: */ + } +/* L90: */ + } + } + + if (*ku == 0 && *kl > 0) { + +/* A has been reduced to lower bidiagonal form */ + +/* Transform lower bidiagonal form to upper bidiagonal by applying */ +/* plane rotations from the left, storing diagonal elements in D */ +/* and off-diagonal elements in E */ + +/* Computing MIN */ + i__2 = *m - 1; + i__1 = f2cmin(i__2,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&ab[i__ * ab_dim1 + 1], &ab[i__ * ab_dim1 + 2], &rc, &rs, + &ra); + d__[i__] = ra; + if (i__ < *n) { + e[i__] = rs * ab[(i__ + 1) * ab_dim1 + 1]; + ab[(i__ + 1) * ab_dim1 + 1] = rc * ab[(i__ + 1) * ab_dim1 + 1] + ; + } + if (wantq) { + drot_(m, &q[i__ * q_dim1 + 1], &c__1, &q[(i__ + 1) * q_dim1 + + 1], &c__1, &rc, &rs); + } + if (wantc) { + drot_(ncc, &c__[i__ + c_dim1], ldc, &c__[i__ + 1 + c_dim1], + ldc, &rc, &rs); + } +/* L100: */ + } + if (*m <= *n) { + d__[*m] = ab[*m * ab_dim1 + 1]; + } + } else if (*ku > 0) { + +/* A has been reduced to upper bidiagonal form */ + + if (*m < *n) { + +/* Annihilate a(m,m+1) by applying plane rotations from the */ +/* right, storing diagonal elements in D and off-diagonal */ +/* elements in E */ + + rb = ab[*ku + (*m + 1) * ab_dim1]; + for (i__ = *m; i__ >= 1; --i__) { + dlartg_(&ab[*ku + 1 + i__ * ab_dim1], &rb, &rc, &rs, &ra); + d__[i__] = ra; + if (i__ > 1) { + rb = -rs * ab[*ku + i__ * ab_dim1]; + e[i__ - 1] = rc * ab[*ku + i__ * ab_dim1]; + } + if (wantpt) { + drot_(n, &pt[i__ + pt_dim1], ldpt, &pt[*m + 1 + pt_dim1], + ldpt, &rc, &rs); + } +/* L110: */ + } + } else { + +/* Copy off-diagonal elements to E and diagonal elements to D */ + + i__1 = minmn - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = ab[*ku + (i__ + 1) * ab_dim1]; +/* L120: */ + } + i__1 = minmn; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = ab[*ku + 1 + i__ * ab_dim1]; +/* L130: */ + } + } + } else { + +/* A is diagonal. Set elements of E to zero and copy diagonal */ +/* elements to D. */ + + i__1 = minmn - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = 0.; +/* L140: */ + } + i__1 = minmn; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = ab[i__ * ab_dim1 + 1]; +/* L150: */ + } + } + return 0; + +/* End of DGBBRD */ + +} /* dgbbrd_ */ + diff --git a/lapack-netlib/SRC/dgbcon.c b/lapack-netlib/SRC/dgbcon.c new file mode 100644 index 000000000..13759c77f --- /dev/null +++ b/lapack-netlib/SRC/dgbcon.c @@ -0,0 +1,725 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGBCON */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBCON + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, */ +/* WORK, IWORK, INFO ) */ + +/* CHARACTER NORM */ +/* INTEGER INFO, KL, KU, LDAB, N */ +/* DOUBLE PRECISION ANORM, RCOND */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* DOUBLE PRECISION AB( LDAB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBCON estimates the reciprocal of the condition number of a real */ +/* > general band matrix A, in either the 1-norm or the infinity-norm, */ +/* > using the LU factorization computed by DGBTRF. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as */ +/* > RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies whether the 1-norm condition number or the */ +/* > infinity-norm condition number is required: */ +/* > = '1' or 'O': 1-norm; */ +/* > = 'I': Infinity-norm. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > Details of the LU factorization of the band matrix A, as */ +/* > computed by DGBTRF. U is stored as an upper triangular band */ +/* > matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ +/* > the multipliers used during the factorization are stored in */ +/* > rows KL+KU+2 to 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= N, row i of the matrix was */ +/* > interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is DOUBLE PRECISION */ +/* > If NORM = '1' or 'O', the 1-norm of the original matrix A. */ +/* > If NORM = 'I', the infinity-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(norm(A) * norm(inv(A))). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (3*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgbcon_(char *norm, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, doublereal *anorm, + doublereal *rcond, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer kase; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + integer kase1, j; + doublereal t, scale; + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int drscl_(integer *, doublereal *, doublereal *, + integer *); + logical lnoti; + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), dlacn2_(integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *); + integer kd; + extern doublereal dlamch_(char *); + integer lm, jp, ix; + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlatbs_(char *, char *, char *, char *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen); + doublereal ainvnm; + logical onenrm; + char normin[1]; + doublereal smlnum; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --ipiv; + --work; + --iwork; + + /* Function Body */ + *info = 0; + onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O"); + if (! onenrm && ! lsame_(norm, "I")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < (*kl << 1) + *ku + 1) { + *info = -6; + } else if (*anorm < 0.) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBCON", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + + smlnum = dlamch_("Safe minimum"); + +/* Estimate the norm of inv(A). */ + + ainvnm = 0.; + *(unsigned char *)normin = 'N'; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kd = *kl + *ku + 1; + lnoti = *kl > 0; + kase = 0; +L10: + dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(L). */ + + if (lnoti) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *kl, i__3 = *n - j; + lm = f2cmin(i__2,i__3); + jp = ipiv[j]; + t = work[jp]; + if (jp != j) { + work[jp] = work[j]; + work[j] = t; + } + d__1 = -t; + daxpy_(&lm, &d__1, &ab[kd + 1 + j * ab_dim1], &c__1, & + work[j + 1], &c__1); +/* L20: */ + } + } + +/* Multiply by inv(U). */ + + i__1 = *kl + *ku; + dlatbs_("Upper", "No transpose", "Non-unit", normin, n, &i__1, & + ab[ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + + 1], info); + } else { + +/* Multiply by inv(U**T). */ + + i__1 = *kl + *ku; + dlatbs_("Upper", "Transpose", "Non-unit", normin, n, &i__1, &ab[ + ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 1], + info); + +/* Multiply by inv(L**T). */ + + if (lnoti) { + for (j = *n - 1; j >= 1; --j) { +/* Computing MIN */ + i__1 = *kl, i__2 = *n - j; + lm = f2cmin(i__1,i__2); + work[j] -= ddot_(&lm, &ab[kd + 1 + j * ab_dim1], &c__1, & + work[j + 1], &c__1); + jp = ipiv[j]; + if (jp != j) { + t = work[jp]; + work[jp] = work[j]; + work[j] = t; + } +/* L30: */ + } + } + } + +/* Divide X by 1/SCALE if doing so will not cause overflow. */ + + *(unsigned char *)normin = 'Y'; + if (scale != 1.) { + ix = idamax_(n, &work[1], &c__1); + if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) + { + goto L40; + } + drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + +L40: + return 0; + +/* End of DGBCON */ + +} /* dgbcon_ */ + diff --git a/lapack-netlib/SRC/dgbequ.c b/lapack-netlib/SRC/dgbequ.c new file mode 100644 index 000000000..f96f45574 --- /dev/null +++ b/lapack-netlib/SRC/dgbequ.c @@ -0,0 +1,764 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGBEQU */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBEQU + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, */ +/* AMAX, INFO ) */ + +/* INTEGER INFO, KL, KU, LDAB, M, N */ +/* DOUBLE PRECISION AMAX, COLCND, ROWCND */ +/* DOUBLE PRECISION AB( LDAB, * ), C( * ), R( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBEQU computes row and column scalings intended to equilibrate an */ +/* > M-by-N band matrix A and reduce its condition number. R returns the */ +/* > row scale factors and C the column scale factors, chosen to try to */ +/* > make the largest element in each row and column of the matrix B with */ +/* > elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */ +/* > */ +/* > R(i) and C(j) are restricted to be between SMLNUM = smallest safe */ +/* > number and BIGNUM = largest safe number. Use of these scaling */ +/* > factors is not guaranteed to reduce the condition number of A but */ +/* > works well in practice. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > The band matrix A, stored in rows 1 to KL+KU+1. The j-th */ +/* > column of A is stored in the j-th column of the array AB as */ +/* > follows: */ +/* > AB(ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(m,j+kl). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (M) */ +/* > If INFO = 0, or INFO > M, R contains the row scale factors */ +/* > for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, C contains the column scale factors for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ROWCND */ +/* > \verbatim */ +/* > ROWCND is DOUBLE PRECISION */ +/* > If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ +/* > smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ +/* > AMAX is neither too large nor too small, it is not worth */ +/* > scaling by R. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] COLCND */ +/* > \verbatim */ +/* > COLCND is DOUBLE PRECISION */ +/* > If INFO = 0, COLCND contains the ratio of the smallest */ +/* > C(i) to the largest C(i). If COLCND >= 0.1, it is not */ +/* > worth scaling by C. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] AMAX */ +/* > \verbatim */ +/* > AMAX is DOUBLE PRECISION */ +/* > Absolute value of largest matrix element. If AMAX is very */ +/* > close to overflow or very close to underflow, the matrix */ +/* > should be scaled. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= M: the i-th row of A is exactly zero */ +/* > > M: the (i-M)-th column of A is exactly zero */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgbequ_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * + info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j; + doublereal rcmin, rcmax; + integer kd; + extern doublereal dlamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum, smlnum; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --r__; + --c__; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < *kl + *ku + 1) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBEQU", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + *rowcnd = 1.; + *colcnd = 1.; + *amax = 0.; + return 0; + } + +/* Get machine constants. */ + + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + +/* Compute row scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + r__[i__] = 0.; +/* L10: */ + } + +/* Find the maximum element in each row. */ + + kd = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j - *ku; +/* Computing MIN */ + i__4 = j + *kl; + i__3 = f2cmin(i__4,*m); + for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = r__[i__], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], + abs(d__1)); + r__[i__] = f2cmax(d__2,d__3); +/* L20: */ + } +/* L30: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[i__]; + rcmax = f2cmax(d__1,d__2); +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[i__]; + rcmin = f2cmin(d__1,d__2); +/* L40: */ + } + *amax = rcmax; + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] == 0.) { + *info = i__; + return 0; + } +/* L50: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = r__[i__]; + d__1 = f2cmax(d__2,smlnum); + r__[i__] = 1. / f2cmin(d__1,bignum); +/* L60: */ + } + +/* Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)) */ + + *rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + +/* Compute column scale factors */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.; +/* L70: */ + } + +/* Find the maximum element in each column, */ +/* assuming the row scaling computed above. */ + + kd = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = j - *ku; +/* Computing MIN */ + i__4 = j + *kl; + i__2 = f2cmin(i__4,*m); + for (i__ = f2cmax(i__3,1); i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = c__[j], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs( + d__1)) * r__[i__]; + c__[j] = f2cmax(d__2,d__3); +/* L80: */ + } +/* L90: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = f2cmax(d__1,d__2); +/* L100: */ + } + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (c__[j] == 0.) { + *info = *m + j; + return 0; + } +/* L110: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = c__[j]; + d__1 = f2cmax(d__2,smlnum); + c__[j] = 1. / f2cmin(d__1,bignum); +/* L120: */ + } + +/* Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)) */ + + *colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + + return 0; + +/* End of DGBEQU */ + +} /* dgbequ_ */ + diff --git a/lapack-netlib/SRC/dgbequb.c b/lapack-netlib/SRC/dgbequb.c new file mode 100644 index 000000000..906e4ed41 --- /dev/null +++ b/lapack-netlib/SRC/dgbequb.c @@ -0,0 +1,783 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGBEQUB */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBEQUB + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, */ +/* AMAX, INFO ) */ + +/* INTEGER INFO, KL, KU, LDAB, M, N */ +/* DOUBLE PRECISION AMAX, COLCND, ROWCND */ +/* DOUBLE PRECISION AB( LDAB, * ), C( * ), R( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBEQUB computes row and column scalings intended to equilibrate an */ +/* > M-by-N matrix A and reduce its condition number. R returns the row */ +/* > scale factors and C the column scale factors, chosen to try to make */ +/* > the largest element in each row and column of the matrix B with */ +/* > elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */ +/* > the radix. */ +/* > */ +/* > R(i) and C(j) are restricted to be a power of the radix between */ +/* > SMLNUM = smallest safe number and BIGNUM = largest safe number. Use */ +/* > of these scaling factors is not guaranteed to reduce the condition */ +/* > number of A but works well in practice. */ +/* > */ +/* > This routine differs from DGEEQU by restricting the scaling factors */ +/* > to a power of the radix. Barring over- and underflow, scaling by */ +/* > these factors introduces no additional rounding errors. However, the */ +/* > scaled entries' magnitudes are no longer approximately 1 but lie */ +/* > between sqrt(radix) and 1/sqrt(radix). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* > The j-th column of A is stored in the j-th column of the */ +/* > array AB as follows: */ +/* > AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array A. LDAB >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (M) */ +/* > If INFO = 0 or INFO > M, R contains the row scale factors */ +/* > for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, C contains the column scale factors for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ROWCND */ +/* > \verbatim */ +/* > ROWCND is DOUBLE PRECISION */ +/* > If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ +/* > smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ +/* > AMAX is neither too large nor too small, it is not worth */ +/* > scaling by R. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] COLCND */ +/* > \verbatim */ +/* > COLCND is DOUBLE PRECISION */ +/* > If INFO = 0, COLCND contains the ratio of the smallest */ +/* > C(i) to the largest C(i). If COLCND >= 0.1, it is not */ +/* > worth scaling by C. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] AMAX */ +/* > \verbatim */ +/* > AMAX is DOUBLE PRECISION */ +/* > Absolute value of largest matrix element. If AMAX is very */ +/* > close to overflow or very close to underflow, the matrix */ +/* > should be scaled. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= M: the i-th row of A is exactly zero */ +/* > > M: the (i-M)-th column of A is exactly zero */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgbequb_(integer *m, integer *n, integer *kl, integer * + ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, + doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer * + info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j; + doublereal radix, rcmin, rcmax; + integer kd; + extern doublereal dlamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum, logrdx, smlnum; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --r__; + --c__; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < *kl + *ku + 1) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBEQUB", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rowcnd = 1.; + *colcnd = 1.; + *amax = 0.; + return 0; + } + +/* Get machine constants. Assume SMLNUM is a power of the radix. */ + + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + radix = dlamch_("B"); + logrdx = log(radix); + +/* Compute row scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + r__[i__] = 0.; +/* L10: */ + } + +/* Find the maximum element in each row. */ + + kd = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j - *ku; +/* Computing MIN */ + i__4 = j + *kl; + i__3 = f2cmin(i__4,*m); + for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = r__[i__], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], + abs(d__1)); + r__[i__] = f2cmax(d__2,d__3); +/* L20: */ + } +/* L30: */ + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] > 0.) { + i__3 = (integer) (log(r__[i__]) / logrdx); + r__[i__] = pow_di(&radix, &i__3); + } + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[i__]; + rcmax = f2cmax(d__1,d__2); +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[i__]; + rcmin = f2cmin(d__1,d__2); +/* L40: */ + } + *amax = rcmax; + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] == 0.) { + *info = i__; + return 0; + } +/* L50: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = r__[i__]; + d__1 = f2cmax(d__2,smlnum); + r__[i__] = 1. / f2cmin(d__1,bignum); +/* L60: */ + } + +/* Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)). */ + + *rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + +/* Compute column scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.; +/* L70: */ + } + +/* Find the maximum element in each column, */ +/* assuming the row scaling computed above. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = j - *ku; +/* Computing MIN */ + i__4 = j + *kl; + i__2 = f2cmin(i__4,*m); + for (i__ = f2cmax(i__3,1); i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = c__[j], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs( + d__1)) * r__[i__]; + c__[j] = f2cmax(d__2,d__3); +/* L80: */ + } + if (c__[j] > 0.) { + i__2 = (integer) (log(c__[j]) / logrdx); + c__[j] = pow_di(&radix, &i__2); + } +/* L90: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = f2cmax(d__1,d__2); +/* L100: */ + } + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (c__[j] == 0.) { + *info = *m + j; + return 0; + } +/* L110: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = c__[j]; + d__1 = f2cmax(d__2,smlnum); + c__[j] = 1. / f2cmin(d__1,bignum); +/* L120: */ + } + +/* Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)). */ + + *colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + + return 0; + +/* End of DGBEQUB */ + +} /* dgbequb_ */ + diff --git a/lapack-netlib/SRC/dgbrfs.c b/lapack-netlib/SRC/dgbrfs.c new file mode 100644 index 000000000..14984b885 --- /dev/null +++ b/lapack-netlib/SRC/dgbrfs.c @@ -0,0 +1,919 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGBRFS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBRFS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, */ +/* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */ +/* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBRFS improves the computed solution to a system of linear */ +/* > equations when the coefficient matrix is banded, and provides */ +/* > error bounds and backward error estimates for the solution. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > The original band matrix A, stored in rows 1 to KL+KU+1. */ +/* > The j-th column of A is stored in the j-th column of the */ +/* > array AB as follows: */ +/* > AB(ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(n,j+kl). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AFB */ +/* > \verbatim */ +/* > AFB is DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* > Details of the LU factorization of the band matrix A, as */ +/* > computed by DGBTRF. U is stored as an upper triangular band */ +/* > matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ +/* > the multipliers used during the factorization are stored in */ +/* > rows KL+KU+2 to 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAFB */ +/* > \verbatim */ +/* > LDAFB is INTEGER */ +/* > The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from DGBTRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by DGBTRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (3*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > ITMAX is the maximum number of steps of iterative refinement. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgbrfs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, + integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer kase; + doublereal safe1, safe2; + integer i__, j, k; + doublereal s; + extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * + , integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + integer count; + extern /* Subroutine */ int dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer kk; + extern doublereal dlamch_(char *); + doublereal xk; + integer nz; + doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dgbtrs_( + char *, integer *, integer *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *); + logical notran; + char transt[1]; + doublereal lstres, eps; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1 * 1; + afb -= afb_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*ldab < *kl + *ku + 1) { + *info = -7; + } else if (*ldafb < (*kl << 1) + *ku + 1) { + *info = -9; + } else if (*ldb < f2cmax(1,*n)) { + *info = -12; + } else if (*ldx < f2cmax(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBRFS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + +/* Computing MIN */ + i__1 = *kl + *ku + 2, i__2 = *n + 1; + nz = f2cmin(i__1,i__2); + eps = dlamch_("Epsilon"); + safmin = dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A, A**T, or A**H, depending on TRANS. */ + + dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + dgbmv_(trans, n, n, kl, ku, &c_b15, &ab[ab_offset], ldab, &x[j * + x_dim1 + 1], &c__1, &c_b17, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L30: */ + } + +/* Compute abs(op(A))*abs(X) + abs(B). */ + + if (notran) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + kk = *ku + 1 - k; + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); +/* Computing MAX */ + i__3 = 1, i__4 = k - *ku; +/* Computing MIN */ + i__6 = *n, i__7 = k + *kl; + i__5 = f2cmin(i__6,i__7); + for (i__ = f2cmax(i__3,i__4); i__ <= i__5; ++i__) { + work[i__] += (d__1 = ab[kk + i__ + k * ab_dim1], abs(d__1) + ) * xk; +/* L40: */ + } +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + kk = *ku + 1 - k; +/* Computing MAX */ + i__5 = 1, i__3 = k - *ku; +/* Computing MIN */ + i__6 = *n, i__7 = k + *kl; + i__4 = f2cmin(i__6,i__7); + for (i__ = f2cmax(i__5,i__3); i__ <= i__4; ++i__) { + s += (d__1 = ab[kk + i__ + k * ab_dim1], abs(d__1)) * ( + d__2 = x[i__ + j * x_dim1], abs(d__2)); +/* L60: */ + } + work[k] += s; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = f2cmax(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = f2cmax(d__2,d__3); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1] + , &work[*n + 1], n, info); + daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)**T). */ + + dgbtrs_(transt, n, kl, ku, &c__1, &afb[afb_offset], ldafb, & + ipiv[1], &work[*n + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] *= work[i__]; +/* L110: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] *= work[i__]; +/* L120: */ + } + dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, & + ipiv[1], &work[*n + 1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = f2cmax(d__2,d__3); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of DGBRFS */ + +} /* dgbrfs_ */ + diff --git a/lapack-netlib/SRC/dgbrfsx.c b/lapack-netlib/SRC/dgbrfsx.c new file mode 100644 index 000000000..b6ff69087 --- /dev/null +++ b/lapack-netlib/SRC/dgbrfsx.c @@ -0,0 +1,1181 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGBRFSX */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBRFSX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, */ +/* LDAFB, IPIV, R, C, B, LDB, X, LDX, RCOND, */ +/* BERR, N_ERR_BNDS, ERR_BNDS_NORM, */ +/* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS, EQUED */ +/* INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, KL, KU, NRHS, */ +/* $ NPARAMS, N_ERR_BNDS */ +/* DOUBLE PRECISION RCOND */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */ +/* $ X( LDX , * ),WORK( * ) */ +/* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), */ +/* $ ERR_BNDS_NORM( NRHS, * ), */ +/* $ ERR_BNDS_COMP( NRHS, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBRFSX improves the computed solution to a system of linear */ +/* > equations and provides error bounds and backward error estimates */ +/* > for the solution. In addition to normwise error bound, the code */ +/* > provides maximum componentwise error bound if possible. See */ +/* > comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */ +/* > error bounds. */ +/* > */ +/* > The original system of linear equations may have been equilibrated */ +/* > before calling this routine, as described by arguments EQUED, R */ +/* > and C below. In this case, the solution and error bounds returned */ +/* > are for the original unequilibrated system. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \verbatim */ +/* > Some optional parameters are bundled in the PARAMS array. These */ +/* > settings determine how refinement is performed, but often the */ +/* > defaults are acceptable. If the defaults are acceptable, users */ +/* > can pass NPARAMS = 0 which prevents the source code from accessing */ +/* > the PARAMS argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done to A */ +/* > before calling this routine. This is needed to compute */ +/* > the solution and error bounds correctly. */ +/* > = 'N': No equilibration */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > The right hand side B has been changed accordingly. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > The original band matrix A, stored in rows 1 to KL+KU+1. */ +/* > The j-th column of A is stored in the j-th column of the */ +/* > array AB as follows: */ +/* > AB(ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(n,j+kl). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AFB */ +/* > \verbatim */ +/* > AFB is DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* > Details of the LU factorization of the band matrix A, as */ +/* > computed by DGBTRF. U is stored as an upper triangular band */ +/* > matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ +/* > the multipliers used during the factorization are stored in */ +/* > rows KL+KU+2 to 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAFB */ +/* > \verbatim */ +/* > LDAFB is INTEGER */ +/* > The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. R is an input argument if FACT = 'F'; */ +/* > otherwise, R is an output argument. If FACT = 'F' and */ +/* > EQUED = 'R' or 'B', each element of R must be positive. */ +/* > If R is output, each element of R is a power of the radix. */ +/* > If R is input, each element of R should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. C is an input argument if FACT = 'F'; */ +/* > otherwise, C is an output argument. If FACT = 'F' and */ +/* > EQUED = 'C' or 'B', each element of C must be positive. */ +/* > If C is output, each element of C is a power of the radix. */ +/* > If C is input, each element of C should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by DGETRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > Componentwise relative backward error. This is the */ +/* > componentwise relative backward error of each solution vector X(j) */ +/* > (i.e., the smallest relative change in any element of A or B that */ +/* > makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_ERR_BNDS */ +/* > \verbatim */ +/* > N_ERR_BNDS is INTEGER */ +/* > Number of error bounds to return for each right hand side */ +/* > and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* > ERR_BNDS_COMP below. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_NORM */ +/* > \verbatim */ +/* > ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_COMP */ +/* > \verbatim */ +/* > ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERR_BNDS_COMP is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NPARAMS */ +/* > \verbatim */ +/* > NPARAMS is INTEGER */ +/* > Specifies the number of parameters set in PARAMS. If <= 0, the */ +/* > PARAMS array is never referenced and default values are used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PARAMS */ +/* > \verbatim */ +/* > PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) */ +/* > Specifies algorithm parameters. If an entry is < 0.0, then */ +/* > that entry will be filled with default value used for that */ +/* > parameter. Only positions up to NPARAMS are accessed; defaults */ +/* > are used for higher-numbered parameters. */ +/* > */ +/* > PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* > refinement or not. */ +/* > Default: 1.0D+0 */ +/* > = 0.0: No refinement is performed, and no error bounds are */ +/* > computed. */ +/* > = 1.0: Use the double-precision refinement algorithm, */ +/* > possibly with doubled-single computations if the */ +/* > compilation environment does not support DOUBLE */ +/* > PRECISION. */ +/* > (other values are reserved for future use) */ +/* > */ +/* > PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* > computations allowed for refinement. */ +/* > Default: 10 */ +/* > Aggressive: Set to 100 to permit convergence using approximate */ +/* > factorizations or factorizations other than LU. If */ +/* > the factorization uses a technique other than */ +/* > Gaussian elimination, the guarantees in */ +/* > err_bnds_norm and err_bnds_comp may no longer be */ +/* > trustworthy. */ +/* > */ +/* > PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* > will attempt to find a solution with small componentwise */ +/* > relative error in the double-precision algorithm. Positive */ +/* > is true, 0.0 is false. */ +/* > Default: 1.0 (attempt componentwise convergence) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. The solution to every right-hand side is */ +/* > guaranteed. */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly singular, so */ +/* > the solution and error bounds could not be computed. RCOND = 0 */ +/* > is returned. */ +/* > = N+J: The solution corresponding to the Jth right-hand side is */ +/* > not guaranteed. The solutions corresponding to other right- */ +/* > hand sides K with K > J may not be guaranteed as well, but */ +/* > only the first such right-hand side is reported. If a small */ +/* > componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* > the Jth right-hand side is the first with a normwise error */ +/* > bound that is not guaranteed (the smallest J such */ +/* > that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* > the Jth right-hand side is the first with either a normwise or */ +/* > componentwise error bound that is not guaranteed (the smallest */ +/* > J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* > ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* > ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* > about all of the right-hand sides check ERR_BNDS_NORM or */ +/* > ERR_BNDS_COMP. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup doubleGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgbrfsx_(char *trans, char *equed, integer *n, integer * + kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, doublereal *r__, + doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer * + ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__; + char norm[1]; + integer ref_type__; + extern integer ilatrans_(char *); + logical ignore_cwise__; + integer j; + extern logical lsame_(char *, char *); + doublereal anorm, rcond_tmp__; + integer prec_type__; + extern doublereal dlamch_(char *), dlangb_(char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dgbcon_(char *, integer *, integer *, integer + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *), xerbla_(char *, + integer *, ftnlen); + logical colequ, notran, rowequ; + integer trans_type__; + extern doublereal dla_gbrcond_(char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *); + extern integer ilaprec_(char *); + integer ithresh, n_norms__; + doublereal rthresh, cwise_wrong__; + extern /* Subroutine */ int dla_gbrfsx_extended_(integer *, integer *, + integer *, integer *, integer *, integer *, doublereal *, integer + *, doublereal *, integer *, integer *, logical *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal * + , doublereal *, logical *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ================================================================== */ + + +/* Check the input parameters. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1; + err_bnds_norm__ -= err_bnds_norm_offset; + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1 * 1; + afb -= afb_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + trans_type__ = ilatrans_(trans); + ref_type__ = 1; + if (*nparams >= 1) { + if (params[1] < 0.) { + params[1] = 1.; + } else { + ref_type__ = (integer) params[1]; + } + } + +/* Set default parameters. */ + + illrcond_thresh__ = (doublereal) (*n) * dlamch_("Epsilon"); + ithresh = 10; + rthresh = .5; + unstable_thresh__ = .25; + ignore_cwise__ = FALSE_; + + if (*nparams >= 2) { + if (params[2] < 0.) { + params[2] = (doublereal) ithresh; + } else { + ithresh = (integer) params[2]; + } + } + if (*nparams >= 3) { + if (params[3] < 0.) { + if (ignore_cwise__) { + params[3] = 0.; + } else { + params[3] = 1.; + } + } else { + ignore_cwise__ = params[3] == 0.; + } + } + if (ref_type__ == 0 || *n_err_bnds__ == 0) { + n_norms__ = 0; + } else if (ignore_cwise__) { + n_norms__ = 1; + } else { + n_norms__ = 2; + } + + notran = lsame_(trans, "N"); + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + +/* Test input parameters. */ + + if (trans_type__ == -1) { + *info = -1; + } else if (! rowequ && ! colequ && ! lsame_(equed, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kl < 0) { + *info = -4; + } else if (*ku < 0) { + *info = -5; + } else if (*nrhs < 0) { + *info = -6; + } else if (*ldab < *kl + *ku + 1) { + *info = -8; + } else if (*ldafb < (*kl << 1) + *ku + 1) { + *info = -10; + } else if (*ldb < f2cmax(1,*n)) { + *info = -13; + } else if (*ldx < f2cmax(1,*n)) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBRFSX", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *nrhs == 0) { + *rcond = 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 0.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } + if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.; + } + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.; + } + } + return 0; + } + +/* Default to failure. */ + + *rcond = 0.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 1.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } + if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.; + } + } + +/* Compute the norm of A and the reciprocal of the condition */ +/* number of A. */ + + if (notran) { + *(unsigned char *)norm = 'I'; + } else { + *(unsigned char *)norm = '1'; + } + anorm = dlangb_(norm, n, kl, ku, &ab[ab_offset], ldab, &work[1]); + dgbcon_(norm, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], &anorm, rcond, + &work[1], &iwork[1], info); + +/* Perform refinement on each right-hand side */ + + if (ref_type__ != 0 && *info == 0) { + prec_type__ = ilaprec_("E"); + if (notran) { + dla_gbrfsx_extended_(&prec_type__, &trans_type__, n, kl, ku, + nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, & + ipiv[1], &colequ, &c__[1], &b[b_offset], ldb, &x[x_offset] + , ldx, &berr[1], &n_norms__, &err_bnds_norm__[ + err_bnds_norm_offset], &err_bnds_comp__[ + err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n + << 1) + 1], &work[1], rcond, &ithresh, &rthresh, & + unstable_thresh__, &ignore_cwise__, info); + } else { + dla_gbrfsx_extended_(&prec_type__, &trans_type__, n, kl, ku, + nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, & + ipiv[1], &rowequ, &r__[1], &b[b_offset], ldb, &x[x_offset] + , ldx, &berr[1], &n_norms__, &err_bnds_norm__[ + err_bnds_norm_offset], &err_bnds_comp__[ + err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n + << 1) + 1], &work[1], rcond, &ithresh, &rthresh, & + unstable_thresh__, &ignore_cwise__, info); + } + } +/* Computing MAX */ + d__1 = 10., d__2 = sqrt((doublereal) (*n)); + err_lbnd__ = f2cmax(d__1,d__2) * dlamch_("Epsilon"); + if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { + +/* Compute scaled normwise condition number cond(A*C). */ + + if (colequ && notran) { + rcond_tmp__ = dla_gbrcond_(trans, n, kl, ku, &ab[ab_offset], + ldab, &afb[afb_offset], ldafb, &ipiv[1], &c_n1, &c__[1], + info, &work[1], &iwork[1]); + } else if (rowequ && ! notran) { + rcond_tmp__ = dla_gbrcond_(trans, n, kl, ku, &ab[ab_offset], + ldab, &afb[afb_offset], ldafb, &ipiv[1], &c_n1, &r__[1], + info, &work[1], &iwork[1]); + } else { + rcond_tmp__ = dla_gbrcond_(trans, n, kl, ku, &ab[ab_offset], + ldab, &afb[afb_offset], ldafb, &ipiv[1], &c__0, &r__[1], + info, &work[1], &iwork[1]); + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 + << 1)] > 1.) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_norm__[j + err_bnds_norm_dim1] = 0.; + if (*info <= *n) { + *info = *n + j; + } + } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < + err_lbnd__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; + } + } + } + if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { + +/* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ +/* each right-hand side using the current solution as an estimate of */ +/* the true solution. If the componentwise error estimate is too */ +/* large, then the solution is a lousy estimate of truth and the */ +/* estimated RCOND may be too optimistic. To avoid misleading users, */ +/* the inverse condition number is set to 0.0 when the estimated */ +/* cwise error is at least CWISE_WRONG. */ + + cwise_wrong__ = sqrt(dlamch_("Epsilon")); + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + cwise_wrong__) { + rcond_tmp__ = dla_gbrcond_(trans, n, kl, ku, &ab[ab_offset], + ldab, &afb[afb_offset], ldafb, &ipiv[1], &c__1, &x[j * + x_dim1 + 1], info, &work[1], &iwork[1]); + } else { + rcond_tmp__ = 0.; + } + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 + << 1)] > 1.) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 0.; + if (params[3] == 1. && *info < *n + j) { + *info = *n + j; + } + } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + err_lbnd__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; + } + } + } + + return 0; + +/* End of DGBRFSX */ + +} /* dgbrfsx_ */ + diff --git a/lapack-netlib/SRC/dgbsv.c b/lapack-netlib/SRC/dgbsv.c new file mode 100644 index 000000000..5668af30f --- /dev/null +++ b/lapack-netlib/SRC/dgbsv.c @@ -0,0 +1,622 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGBSV computes the solution to system of linear equations A * X = B for GB matrices (simpl +e driver) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBSV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) */ + +/* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBSV computes the solution to a real system of linear equations */ +/* > A * X = B, where A is a band matrix of order N with KL subdiagonals */ +/* > and KU superdiagonals, and X and B are N-by-NRHS matrices. */ +/* > */ +/* > The LU decomposition with partial pivoting and row interchanges is */ +/* > used to factor A as A = L * U, where L is a product of permutation */ +/* > and unit lower triangular matrices with KL subdiagonals, and U is */ +/* > upper triangular with KL+KU superdiagonals. The factored form of A */ +/* > is then used to solve the system of equations A * X = B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > On entry, the matrix A in band storage, in rows KL+1 to */ +/* > 2*KL+KU+1; rows 1 to KL of the array need not be set. */ +/* > The j-th column of A is stored in the j-th column of the */ +/* > array AB as follows: */ +/* > AB(KL+KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+KL) */ +/* > On exit, details of the factorization: U is stored as an */ +/* > upper triangular band matrix with KL+KU superdiagonals in */ +/* > rows 1 to KL+KU+1, and the multipliers used during the */ +/* > factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices that define the permutation matrix P; */ +/* > row i of the matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and the solution has not been computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGBsolve */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The band storage scheme is illustrated by the following example, when */ +/* > M = N = 6, KL = 2, KU = 1: */ +/* > */ +/* > On entry: On exit: */ +/* > */ +/* > * * * + + + * * * u14 u25 u36 */ +/* > * * + + + + * * u13 u24 u35 u46 */ +/* > * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ +/* > a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ +/* > a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ +/* > a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ +/* > */ +/* > Array elements marked * are not used by the routine; elements marked */ +/* > + need not be set on entry, but are required by the routine to store */ +/* > elements of U because of fill-in resulting from the row interchanges. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgbsv_(integer *n, integer *kl, integer *ku, integer * + nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, + integer *ldb, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int dgbtrf_(integer *, integer *, integer *, + integer *, doublereal *, integer *, integer *, integer *), + xerbla_(char *, integer *, ftnlen), dgbtrs_(char *, integer *, + integer *, integer *, integer *, doublereal *, integer *, integer + *, doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*kl < 0) { + *info = -2; + } else if (*ku < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldab < (*kl << 1) + *ku + 1) { + *info = -6; + } else if (*ldb < f2cmax(*n,1)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBSV ", &i__1, (ftnlen)6); + return 0; + } + +/* Compute the LU factorization of the band matrix A. */ + + dgbtrf_(n, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + dgbtrs_("No transpose", n, kl, ku, nrhs, &ab[ab_offset], ldab, &ipiv[ + 1], &b[b_offset], ldb, info); + } + return 0; + +/* End of DGBSV */ + +} /* dgbsv_ */ + diff --git a/lapack-netlib/SRC/dgbsvx.c b/lapack-netlib/SRC/dgbsvx.c new file mode 100644 index 000000000..370e0670a --- /dev/null +++ b/lapack-netlib/SRC/dgbsvx.c @@ -0,0 +1,1138 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGBSVX computes the solution to system of linear equations A * X = B for GB matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBSVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, */ +/* LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, */ +/* RCOND, FERR, BERR, WORK, IWORK, INFO ) */ + +/* CHARACTER EQUED, FACT, TRANS */ +/* INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS */ +/* DOUBLE PRECISION RCOND */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */ +/* $ BERR( * ), C( * ), FERR( * ), R( * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBSVX uses the LU factorization to compute the solution to a real */ +/* > system of linear equations A * X = B, A**T * X = B, or A**H * X = B, */ +/* > where A is a band matrix of order N with KL subdiagonals and KU */ +/* > superdiagonals, and X and B are N-by-NRHS matrices. */ +/* > */ +/* > Error bounds on the solution and a condition estimate are also */ +/* > provided. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed by this subroutine: */ +/* > */ +/* > 1. If FACT = 'E', real scaling factors are computed to equilibrate */ +/* > the system: */ +/* > TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* > TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* > TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ +/* > Whether or not the system will be equilibrated depends on the */ +/* > scaling of the matrix A, but if equilibration is used, A is */ +/* > overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* > or diag(C)*B (if TRANS = 'T' or 'C'). */ +/* > */ +/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the */ +/* > matrix A (after equilibration if FACT = 'E') as */ +/* > A = L * U, */ +/* > where L is a product of permutation and unit lower triangular */ +/* > matrices with KL subdiagonals, and U is upper triangular with */ +/* > KL+KU superdiagonals. */ +/* > */ +/* > 3. If some U(i,i)=0, so that U is exactly singular, then the routine */ +/* > returns with INFO = i. Otherwise, the factored form of A is used */ +/* > to estimate the condition number of the matrix A. If the */ +/* > reciprocal of the condition number is less than machine precision, */ +/* > INFO = N+1 is returned as a warning, but the routine still goes on */ +/* > to solve for X and compute error bounds as described below. */ +/* > */ +/* > 4. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 5. Iterative refinement is applied to improve the computed solution */ +/* > matrix and calculate error bounds and backward error estimates */ +/* > for it. */ +/* > */ +/* > 6. If equilibration was used, the matrix X is premultiplied by */ +/* > diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* > that it solves the original system before equilibration. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of the matrix A is */ +/* > supplied on entry, and if not, whether the matrix A should be */ +/* > equilibrated before it is factored. */ +/* > = 'F': On entry, AFB and IPIV contain the factored form of */ +/* > A. If EQUED is not 'N', the matrix A has been */ +/* > equilibrated with scaling factors given by R and C. */ +/* > AB, AFB, and IPIV are not modified. */ +/* > = 'N': The matrix A will be copied to AFB and factored. */ +/* > = 'E': The matrix A will be equilibrated if necessary, then */ +/* > copied to AFB and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations. */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* > The j-th column of A is stored in the j-th column of the */ +/* > array AB as follows: */ +/* > AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */ +/* > */ +/* > If FACT = 'F' and EQUED is not 'N', then A must have been */ +/* > equilibrated by the scaling factors in R and/or C. AB is not */ +/* > modified if FACT = 'F' or 'N', or if FACT = 'E' and */ +/* > EQUED = 'N' on exit. */ +/* > */ +/* > On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* > EQUED = 'R': A := diag(R) * A */ +/* > EQUED = 'C': A := A * diag(C) */ +/* > EQUED = 'B': A := diag(R) * A * diag(C). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AFB */ +/* > \verbatim */ +/* > AFB is DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* > If FACT = 'F', then AFB is an input argument and on entry */ +/* > contains details of the LU factorization of the band matrix */ +/* > A, as computed by DGBTRF. U is stored as an upper triangular */ +/* > band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ +/* > and the multipliers used during the factorization are stored */ +/* > in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is */ +/* > the factored form of the equilibrated matrix A. */ +/* > */ +/* > If FACT = 'N', then AFB is an output argument and on exit */ +/* > returns details of the LU factorization of A. */ +/* > */ +/* > If FACT = 'E', then AFB is an output argument and on exit */ +/* > returns details of the LU factorization of the equilibrated */ +/* > matrix A (see the description of AB for the form of the */ +/* > equilibrated matrix). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAFB */ +/* > \verbatim */ +/* > LDAFB is INTEGER */ +/* > The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the factorization A = L*U */ +/* > as computed by DGBTRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = L*U */ +/* > of the equilibrated matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done. */ +/* > = 'N': No equilibration (always true if FACT = 'N'). */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* > output argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. R is an input argument if FACT = 'F'; */ +/* > otherwise, R is an output argument. If FACT = 'F' and */ +/* > EQUED = 'R' or 'B', each element of R must be positive. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. C is an input argument if FACT = 'F'; */ +/* > otherwise, C is an output argument. If FACT = 'F' and */ +/* > EQUED = 'C' or 'B', each element of C must be positive. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the right hand side matrix B. */ +/* > On exit, */ +/* > if EQUED = 'N', B is not modified; */ +/* > if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* > diag(R)*B; */ +/* > if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* > overwritten by diag(C)*B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* > If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */ +/* > to the original system of equations. Note that A and B are */ +/* > modified on exit if EQUED .ne. 'N', and the solution to the */ +/* > equilibrated system is inv(diag(C))*X if TRANS = 'N' and */ +/* > EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */ +/* > and EQUED = 'R' or 'B'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The estimate of the reciprocal condition number of the matrix */ +/* > A after equilibration (if done). If RCOND is less than the */ +/* > machine precision (in particular, if RCOND = 0), the matrix */ +/* > is singular to working precision. This condition is */ +/* > indicated by a return code of INFO > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (3*N) */ +/* > On exit, WORK(1) contains the reciprocal pivot growth */ +/* > factor norm(A)/norm(U). The "f2cmax absolute element" norm is */ +/* > used. If WORK(1) is much less than 1, then the stability */ +/* > of the LU factorization of the (equilibrated) matrix A */ +/* > could be poor. This also means that the solution X, condition */ +/* > estimator RCOND, and forward error bound FERR could be */ +/* > unreliable. If factorization fails with 0 WORK(1) contains the reciprocal pivot growth factor for the */ +/* > leading INFO columns of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, so the solution and error bounds */ +/* > could not be computed. RCOND = 0 is returned. */ +/* > = N+1: U is nonsingular, but RCOND is less than machine */ +/* > precision, meaning that the matrix is singular */ +/* > to working precision. Nevertheless, the */ +/* > solution and error bounds are computed because */ +/* > there are a number of situations where the */ +/* > computed solution can be more accurate than the */ +/* > value of RCOND would suggest. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup doubleGBsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dgbsvx_(char *fact, char *trans, integer *n, integer *kl, + integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3; + + /* Local variables */ + doublereal amax; + char norm[1]; + integer i__, j; + extern logical lsame_(char *, char *); + doublereal rcmin, rcmax, anorm; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical equil; + integer j1, j2; + extern doublereal dlamch_(char *), dlangb_(char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dlaqgb_(integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, char *), + dgbcon_(char *, integer *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *); + doublereal colcnd; + extern doublereal dlantb_(char *, char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dgbequ_(integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *), dgbrfs_( + char *, integer *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *), dgbtrf_(integer *, + integer *, integer *, integer *, doublereal *, integer *, integer + *, integer *); + logical nofact; + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dgbtrs_(char *, integer *, integer *, integer + *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + integer infequ; + logical colequ; + doublereal rowcnd; + logical notran; + doublereal smlnum; + logical rowequ; + doublereal rpvgrw; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1 * 1; + afb -= afb_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + equil = lsame_(fact, "E"); + notran = lsame_(trans, "N"); + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + smlnum = dlamch_("Safe minimum"); + bignum = 1. / smlnum; + } + +/* Test the input parameters. */ + + if (! nofact && ! equil && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kl < 0) { + *info = -4; + } else if (*ku < 0) { + *info = -5; + } else if (*nrhs < 0) { + *info = -6; + } else if (*ldab < *kl + *ku + 1) { + *info = -8; + } else if (*ldafb < (*kl << 1) + *ku + 1) { + *info = -10; + } else if (lsame_(fact, "F") && ! (rowequ || colequ + || lsame_(equed, "N"))) { + *info = -12; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[j]; + rcmax = f2cmax(d__1,d__2); +/* L10: */ + } + if (rcmin <= 0.) { + *info = -13; + } else if (*n > 0) { + rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + rowcnd = 1.; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = f2cmax(d__1,d__2); +/* L20: */ + } + if (rcmin <= 0.) { + *info = -14; + } else if (*n > 0) { + colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + colcnd = 1.; + } + } + if (*info == 0) { + if (*ldb < f2cmax(1,*n)) { + *info = -16; + } else if (*ldx < f2cmax(1,*n)) { + *info = -18; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBSVX", &i__1, (ftnlen)6); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + dgbequ_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], &rowcnd, + &colcnd, &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + dlaqgb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], & + rowcnd, &colcnd, &amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + } + +/* Scale the right hand side. */ + + if (notran) { + if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = r__[i__] * b[i__ + j * b_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = c__[i__] * b[i__ + j * b_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of the band matrix A. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j - *ku; + j1 = f2cmax(i__2,1); +/* Computing MIN */ + i__2 = j + *kl; + j2 = f2cmin(i__2,*n); + i__2 = j2 - j1 + 1; + dcopy_(&i__2, &ab[*ku + 1 - j + j1 + j * ab_dim1], &c__1, &afb[* + kl + *ku + 1 - j + j1 + j * afb_dim1], &c__1); +/* L70: */ + } + + dgbtrf_(n, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + anorm = 0.; + i__1 = *info; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = *ku + 2 - j; +/* Computing MIN */ + i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1; + i__3 = f2cmin(i__4,i__5); + for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs( + d__1)); + anorm = f2cmax(d__2,d__3); +/* L80: */ + } +/* L90: */ + } +/* Computing MIN */ + i__3 = *info - 1, i__2 = *kl + *ku; + i__1 = f2cmin(i__3,i__2); +/* Computing MAX */ + i__4 = 1, i__5 = *kl + *ku + 2 - *info; + rpvgrw = dlantb_("M", "U", "N", info, &i__1, &afb[f2cmax(i__4,i__5) + + afb_dim1], ldafb, &work[1]); + if (rpvgrw == 0.) { + rpvgrw = 1.; + } else { + rpvgrw = anorm / rpvgrw; + } + work[1] = rpvgrw; + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A and the */ +/* reciprocal pivot growth factor RPVGRW. */ + + if (notran) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = dlangb_(norm, n, kl, ku, &ab[ab_offset], ldab, &work[1]); + i__1 = *kl + *ku; + rpvgrw = dlantb_("M", "U", "N", n, &i__1, &afb[afb_offset], ldafb, &work[ + 1]); + if (rpvgrw == 0.) { + rpvgrw = 1.; + } else { + rpvgrw = dlangb_("M", n, kl, ku, &ab[ab_offset], ldab, &work[1]) / rpvgrw; + } + +/* Compute the reciprocal of the condition number of A. */ + + dgbcon_(norm, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], &anorm, rcond, + &work[1], &iwork[1], info); + +/* Compute the solution matrix X. */ + + dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + dgbtrs_(trans, n, kl, ku, nrhs, &afb[afb_offset], ldafb, &ipiv[1], &x[ + x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + dgbrfs_(trans, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[afb_offset], + ldafb, &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], & + berr[1], &work[1], &iwork[1], info); + +/* Transform the solution matrix X to a solution of the original */ +/* system. */ + + if (notran) { + if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__ + j * x_dim1] = c__[i__] * x[i__ + j * x_dim1]; +/* L100: */ + } +/* L110: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= colcnd; +/* L120: */ + } + } + } else if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__ + j * x_dim1] = r__[i__] * x[i__ + j * x_dim1]; +/* L130: */ + } +/* L140: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= rowcnd; +/* L150: */ + } + } + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < dlamch_("Epsilon")) { + *info = *n + 1; + } + + work[1] = rpvgrw; + return 0; + +/* End of DGBSVX */ + +} /* dgbsvx_ */ + diff --git a/lapack-netlib/SRC/dgbsvxx.c b/lapack-netlib/SRC/dgbsvxx.c new file mode 100644 index 000000000..c737f112e --- /dev/null +++ b/lapack-netlib/SRC/dgbsvxx.c @@ -0,0 +1,1249 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGBSVXX computes the solution to system of linear equations A * X = B for GB matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBSVXX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBSVXX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, */ +/* LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, */ +/* RCOND, RPVGRW, BERR, N_ERR_BNDS, */ +/* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, */ +/* WORK, IWORK, INFO ) */ + +/* CHARACTER EQUED, FACT, TRANS */ +/* INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, NRHS, NPARAMS, */ +/* $ N_ERR_BNDS, KL, KU */ +/* DOUBLE PRECISION RCOND, RPVGRW */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */ +/* $ X( LDX , * ),WORK( * ) */ +/* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), */ +/* $ ERR_BNDS_NORM( NRHS, * ), */ +/* $ ERR_BNDS_COMP( NRHS, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBSVXX uses the LU factorization to compute the solution to a */ +/* > double precision system of linear equations A * X = B, where A is an */ +/* > N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > If requested, both normwise and maximum componentwise error bounds */ +/* > are returned. DGBSVXX will return a solution with a tiny */ +/* > guaranteed error (O(eps) where eps is the working machine */ +/* > precision) unless the matrix is very ill-conditioned, in which */ +/* > case a warning is returned. Relevant condition numbers also are */ +/* > calculated and returned. */ +/* > */ +/* > DGBSVXX accepts user-provided factorizations and equilibration */ +/* > factors; see the definitions of the FACT and EQUED options. */ +/* > Solving with refinement and using a factorization from a previous */ +/* > DGBSVXX call will also produce a solution with either O(eps) */ +/* > errors or warnings, but we cannot make that claim for general */ +/* > user-provided factorizations and equilibration factors if they */ +/* > differ from what DGBSVXX would itself produce. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ +/* > the system: */ +/* > */ +/* > TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* > TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* > TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ +/* > */ +/* > Whether or not the system will be equilibrated depends on the */ +/* > scaling of the matrix A, but if equilibration is used, A is */ +/* > overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* > or diag(C)*B (if TRANS = 'T' or 'C'). */ +/* > */ +/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ +/* > the matrix A (after equilibration if FACT = 'E') as */ +/* > */ +/* > A = P * L * U, */ +/* > */ +/* > where P is a permutation matrix, L is a unit lower triangular */ +/* > matrix, and U is upper triangular. */ +/* > */ +/* > 3. If some U(i,i)=0, so that U is exactly singular, then the */ +/* > routine returns with INFO = i. Otherwise, the factored form of A */ +/* > is used to estimate the condition number of the matrix A (see */ +/* > argument RCOND). If the reciprocal of the condition number is less */ +/* > than machine precision, the routine still goes on to solve for X */ +/* > and compute error bounds as described below. */ +/* > */ +/* > 4. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ +/* > the routine will use iterative refinement to try to get a small */ +/* > error and error bounds. Refinement calculates the residual to at */ +/* > least twice the working precision. */ +/* > */ +/* > 6. If equilibration was used, the matrix X is premultiplied by */ +/* > diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* > that it solves the original system before equilibration. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \verbatim */ +/* > Some optional parameters are bundled in the PARAMS array. These */ +/* > settings determine how refinement is performed, but often the */ +/* > defaults are acceptable. If the defaults are acceptable, users */ +/* > can pass NPARAMS = 0 which prevents the source code from accessing */ +/* > the PARAMS argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of the matrix A is */ +/* > supplied on entry, and if not, whether the matrix A should be */ +/* > equilibrated before it is factored. */ +/* > = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* > If EQUED is not 'N', the matrix A has been */ +/* > equilibrated with scaling factors given by R and C. */ +/* > A, AF, and IPIV are not modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > = 'E': The matrix A will be equilibrated if necessary, then */ +/* > copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* > The j-th column of A is stored in the j-th column of the */ +/* > array AB as follows: */ +/* > AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */ +/* > */ +/* > If FACT = 'F' and EQUED is not 'N', then AB must have been */ +/* > equilibrated by the scaling factors in R and/or C. AB is not */ +/* > modified if FACT = 'F' or 'N', or if FACT = 'E' and */ +/* > EQUED = 'N' on exit. */ +/* > */ +/* > On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* > EQUED = 'R': A := diag(R) * A */ +/* > EQUED = 'C': A := A * diag(C) */ +/* > EQUED = 'B': A := diag(R) * A * diag(C). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AFB */ +/* > \verbatim */ +/* > AFB is DOUBLE PRECISION array, dimension (LDAFB,N) */ +/* > If FACT = 'F', then AFB is an input argument and on entry */ +/* > contains details of the LU factorization of the band matrix */ +/* > A, as computed by DGBTRF. U is stored as an upper triangular */ +/* > band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ +/* > and the multipliers used during the factorization are stored */ +/* > in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is */ +/* > the factored form of the equilibrated matrix A. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the equilibrated matrix A (see the description of A for */ +/* > the form of the equilibrated matrix). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAFB */ +/* > \verbatim */ +/* > LDAFB is INTEGER */ +/* > The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > as computed by DGETRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the equilibrated matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done. */ +/* > = 'N': No equilibration (always true if FACT = 'N'). */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* > output argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. R is an input argument if FACT = 'F'; */ +/* > otherwise, R is an output argument. If FACT = 'F' and */ +/* > EQUED = 'R' or 'B', each element of R must be positive. */ +/* > If R is output, each element of R is a power of the radix. */ +/* > If R is input, each element of R should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. C is an input argument if FACT = 'F'; */ +/* > otherwise, C is an output argument. If FACT = 'F' and */ +/* > EQUED = 'C' or 'B', each element of C must be positive. */ +/* > If C is output, each element of C is a power of the radix. */ +/* > If C is input, each element of C should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, */ +/* > if EQUED = 'N', B is not modified; */ +/* > if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* > diag(R)*B; */ +/* > if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* > overwritten by diag(C)*B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* > If INFO = 0, the N-by-NRHS solution matrix X to the original */ +/* > system of equations. Note that A and B are modified on exit */ +/* > if EQUED .ne. 'N', and the solution to the equilibrated system is */ +/* > inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */ +/* > inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RPVGRW */ +/* > \verbatim */ +/* > RPVGRW is DOUBLE PRECISION */ +/* > Reciprocal pivot growth. On exit, this contains the reciprocal */ +/* > pivot growth factor norm(A)/norm(U). The "f2cmax absolute element" */ +/* > norm is used. If this is much less than 1, then the stability of */ +/* > the LU factorization of the (equilibrated) matrix A could be poor. */ +/* > This also means that the solution X, estimated condition numbers, */ +/* > and error bounds could be unreliable. If factorization fails with */ +/* > 0 for the leading INFO columns of A. In DGESVX, this quantity is */ +/* > returned in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > Componentwise relative backward error. This is the */ +/* > componentwise relative backward error of each solution vector X(j) */ +/* > (i.e., the smallest relative change in any element of A or B that */ +/* > makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_ERR_BNDS */ +/* > \verbatim */ +/* > N_ERR_BNDS is INTEGER */ +/* > Number of error bounds to return for each right hand side */ +/* > and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* > ERR_BNDS_COMP below. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_NORM */ +/* > \verbatim */ +/* > ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_COMP */ +/* > \verbatim */ +/* > ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERR_BNDS_COMP is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NPARAMS */ +/* > \verbatim */ +/* > NPARAMS is INTEGER */ +/* > Specifies the number of parameters set in PARAMS. If <= 0, the */ +/* > PARAMS array is never referenced and default values are used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PARAMS */ +/* > \verbatim */ +/* > PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) */ +/* > Specifies algorithm parameters. If an entry is < 0.0, then */ +/* > that entry will be filled with default value used for that */ +/* > parameter. Only positions up to NPARAMS are accessed; defaults */ +/* > are used for higher-numbered parameters. */ +/* > */ +/* > PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* > refinement or not. */ +/* > Default: 1.0D+0 */ +/* > = 0.0: No refinement is performed, and no error bounds are */ +/* > computed. */ +/* > = 1.0: Use the extra-precise refinement algorithm. */ +/* > (other values are reserved for future use) */ +/* > */ +/* > PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* > computations allowed for refinement. */ +/* > Default: 10 */ +/* > Aggressive: Set to 100 to permit convergence using approximate */ +/* > factorizations or factorizations other than LU. If */ +/* > the factorization uses a technique other than */ +/* > Gaussian elimination, the guarantees in */ +/* > err_bnds_norm and err_bnds_comp may no longer be */ +/* > trustworthy. */ +/* > */ +/* > PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* > will attempt to find a solution with small componentwise */ +/* > relative error in the double-precision algorithm. Positive */ +/* > is true, 0.0 is false. */ +/* > Default: 1.0 (attempt componentwise convergence) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. The solution to every right-hand side is */ +/* > guaranteed. */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly singular, so */ +/* > the solution and error bounds could not be computed. RCOND = 0 */ +/* > is returned. */ +/* > = N+J: The solution corresponding to the Jth right-hand side is */ +/* > not guaranteed. The solutions corresponding to other right- */ +/* > hand sides K with K > J may not be guaranteed as well, but */ +/* > only the first such right-hand side is reported. If a small */ +/* > componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* > the Jth right-hand side is the first with a normwise error */ +/* > bound that is not guaranteed (the smallest J such */ +/* > that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* > the Jth right-hand side is the first with either a normwise or */ +/* > componentwise error bound that is not guaranteed (the smallest */ +/* > J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* > ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* > ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* > about all of the right-hand sides check ERR_BNDS_NORM or */ +/* > ERR_BNDS_COMP. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup doubleGBsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dgbsvxx_(char *fact, char *trans, integer *n, integer * + kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, + doublereal *afb, integer *ldafb, integer *ipiv, char *equed, + doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, + doublereal *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + doublereal amax; + extern doublereal dla_gbrpvgrw_(integer *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *); + integer i__, j; + extern logical lsame_(char *, char *); + doublereal rcmin, rcmax; + logical equil; + extern doublereal dlamch_(char *); + extern /* Subroutine */ int dlaqgb_(integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, char *); + doublereal colcnd; + extern /* Subroutine */ int dgbtrf_(integer *, integer *, integer *, + integer *, doublereal *, integer *, integer *, integer *); + logical nofact; + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dgbtrs_(char *, integer *, integer *, integer + *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + integer infequ; + logical colequ; + doublereal rowcnd; + logical notran; + doublereal smlnum; + logical rowequ; + extern /* Subroutine */ int dlascl2_(integer *, integer *, doublereal *, + doublereal *, integer *), dgbequb_(integer *, integer *, integer * + , integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *), dgbrfsx_( + char *, char *, integer *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ================================================================== */ + + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1; + err_bnds_norm__ -= err_bnds_norm_offset; + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1 * 1; + afb -= afb_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + equil = lsame_(fact, "E"); + notran = lsame_(trans, "N"); + smlnum = dlamch_("Safe minimum"); + bignum = 1. / smlnum; + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + +/* Default is failure. If an input parameter is wrong or */ +/* factorization fails, make everything look horrible. Only the */ +/* pivot growth is set here, the rest is initialized in DGBRFSX. */ + + *rpvgrw = 0.; + +/* Test the input parameters. PARAMS is not tested until DGBRFSX. */ + + if (! nofact && ! equil && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kl < 0) { + *info = -4; + } else if (*ku < 0) { + *info = -5; + } else if (*nrhs < 0) { + *info = -6; + } else if (*ldab < *kl + *ku + 1) { + *info = -8; + } else if (*ldafb < (*kl << 1) + *ku + 1) { + *info = -10; + } else if (lsame_(fact, "F") && ! (rowequ || colequ + || lsame_(equed, "N"))) { + *info = -12; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[j]; + rcmax = f2cmax(d__1,d__2); +/* L10: */ + } + if (rcmin <= 0.) { + *info = -13; + } else if (*n > 0) { + rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + rowcnd = 1.; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = f2cmax(d__1,d__2); +/* L20: */ + } + if (rcmin <= 0.) { + *info = -14; + } else if (*n > 0) { + colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + colcnd = 1.; + } + } + if (*info == 0) { + if (*ldb < f2cmax(1,*n)) { + *info = -15; + } else if (*ldx < f2cmax(1,*n)) { + *info = -16; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBSVXX", &i__1, (ftnlen)7); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + dgbequb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], & + rowcnd, &colcnd, &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + dlaqgb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], & + rowcnd, &colcnd, &amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + +/* If the scaling factors are not applied, set them to 1.0. */ + + if (! rowequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + r__[j] = 1.; + } + } + if (! colequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 1.; + } + } + } + +/* Scale the right hand side. */ + + if (notran) { + if (rowequ) { + dlascl2_(n, nrhs, &r__[1], &b[b_offset], ldb); + } + } else { + if (colequ) { + dlascl2_(n, nrhs, &c__[1], &b[b_offset], ldb); + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = (*kl << 1) + *ku + 1; + for (i__ = *kl + 1; i__ <= i__2; ++i__) { + afb[i__ + j * afb_dim1] = ab[i__ - *kl + j * ab_dim1]; +/* L30: */ + } +/* L40: */ + } + dgbtrf_(n, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Pivot in column INFO is exactly 0 */ +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + *rpvgrw = dla_gbrpvgrw_(n, kl, ku, info, &ab[ab_offset], ldab, & + afb[afb_offset], ldafb); + return 0; + } + } + +/* Compute the reciprocal pivot growth factor RPVGRW. */ + + *rpvgrw = dla_gbrpvgrw_(n, kl, ku, n, &ab[ab_offset], ldab, &afb[ + afb_offset], ldafb); + +/* Compute the solution matrix X. */ + + dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + dgbtrs_(trans, n, kl, ku, nrhs, &afb[afb_offset], ldafb, &ipiv[1], &x[ + x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + dgbrfsx_(trans, equed, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[ + afb_offset], ldafb, &ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb, + &x[x_offset], ldx, rcond, &berr[1], n_err_bnds__, & + err_bnds_norm__[err_bnds_norm_offset], &err_bnds_comp__[ + err_bnds_comp_offset], nparams, ¶ms[1], &work[1], &iwork[1], + info); + +/* Scale solutions. */ + + if (colequ && notran) { + dlascl2_(n, nrhs, &c__[1], &x[x_offset], ldx); + } else if (rowequ && ! notran) { + dlascl2_(n, nrhs, &r__[1], &x[x_offset], ldx); + } + + return 0; + +/* End of DGBSVXX */ + +} /* dgbsvxx_ */ + diff --git a/lapack-netlib/SRC/dgbtf2.c b/lapack-netlib/SRC/dgbtf2.c new file mode 100644 index 000000000..37c2fea15 --- /dev/null +++ b/lapack-netlib/SRC/dgbtf2.c @@ -0,0 +1,698 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGBTF2 computes the LU factorization of a general band matrix using the unblocked version of th +e algorithm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBTF2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) */ + +/* INTEGER INFO, KL, KU, LDAB, M, N */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION AB( LDAB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBTF2 computes an LU factorization of a real m-by-n band matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > This is the unblocked version of the algorithm, calling Level 2 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > On entry, the matrix A in band storage, in rows KL+1 to */ +/* > 2*KL+KU+1; rows 1 to KL of the array need not be set. */ +/* > The j-th column of A is stored in the j-th column of the */ +/* > array AB as follows: */ +/* > AB(kl+ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(m,j+kl) */ +/* > */ +/* > On exit, details of the factorization: U is stored as an */ +/* > upper triangular band matrix with KL+KU superdiagonals in */ +/* > rows 1 to KL+KU+1, and the multipliers used during the */ +/* > factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGBcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The band storage scheme is illustrated by the following example, when */ +/* > M = N = 6, KL = 2, KU = 1: */ +/* > */ +/* > On entry: On exit: */ +/* > */ +/* > * * * + + + * * * u14 u25 u36 */ +/* > * * + + + + * * u13 u24 u35 u46 */ +/* > * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ +/* > a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ +/* > a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ +/* > a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ +/* > */ +/* > Array elements marked * are not used by the routine; elements marked */ +/* > + need not be set on entry, but are required by the routine to store */ +/* > elements of U, because of fill-in resulting from the row */ +/* > interchanges. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgbtf2_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + doublereal d__1; + + /* Local variables */ + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer i__, j; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dswap_(integer *, doublereal *, integer *, doublereal + *, integer *); + integer km, jp, ju, kv; + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* KV is the number of superdiagonals in the factor U, allowing for */ +/* fill-in. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --ipiv; + + /* Function Body */ + kv = *ku + *kl; + +/* Test the input parameters. */ + + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < *kl + kv + 1) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBTF2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Gaussian elimination with partial pivoting */ + +/* Set fill-in elements in columns KU+2 to KV to zero. */ + + i__1 = f2cmin(kv,*n); + for (j = *ku + 2; j <= i__1; ++j) { + i__2 = *kl; + for (i__ = kv - j + 2; i__ <= i__2; ++i__) { + ab[i__ + j * ab_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* JU is the index of the last column affected by the current stage */ +/* of the factorization. */ + + ju = 1; + + i__1 = f2cmin(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Set fill-in elements in column J+KV to zero. */ + + if (j + kv <= *n) { + i__2 = *kl; + for (i__ = 1; i__ <= i__2; ++i__) { + ab[i__ + (j + kv) * ab_dim1] = 0.; +/* L30: */ + } + } + +/* Find pivot and test for singularity. KM is the number of */ +/* subdiagonal elements in the current column. */ + +/* Computing MIN */ + i__2 = *kl, i__3 = *m - j; + km = f2cmin(i__2,i__3); + i__2 = km + 1; + jp = idamax_(&i__2, &ab[kv + 1 + j * ab_dim1], &c__1); + ipiv[j] = jp + j - 1; + if (ab[kv + jp + j * ab_dim1] != 0.) { +/* Computing MAX */ +/* Computing MIN */ + i__4 = j + *ku + jp - 1; + i__2 = ju, i__3 = f2cmin(i__4,*n); + ju = f2cmax(i__2,i__3); + +/* Apply interchange to columns J to JU. */ + + if (jp != 1) { + i__2 = ju - j + 1; + i__3 = *ldab - 1; + i__4 = *ldab - 1; + dswap_(&i__2, &ab[kv + jp + j * ab_dim1], &i__3, &ab[kv + 1 + + j * ab_dim1], &i__4); + } + + if (km > 0) { + +/* Compute multipliers. */ + + d__1 = 1. / ab[kv + 1 + j * ab_dim1]; + dscal_(&km, &d__1, &ab[kv + 2 + j * ab_dim1], &c__1); + +/* Update trailing submatrix within the band. */ + + if (ju > j) { + i__2 = ju - j; + i__3 = *ldab - 1; + i__4 = *ldab - 1; + dger_(&km, &i__2, &c_b9, &ab[kv + 2 + j * ab_dim1], &c__1, + &ab[kv + (j + 1) * ab_dim1], &i__3, &ab[kv + 1 + + (j + 1) * ab_dim1], &i__4); + } + } + } else { + +/* If pivot is zero, set INFO to the index of the pivot */ +/* unless a zero pivot has already been found. */ + + if (*info == 0) { + *info = j; + } + } +/* L40: */ + } + return 0; + +/* End of DGBTF2 */ + +} /* dgbtf2_ */ + diff --git a/lapack-netlib/SRC/dgbtrf.c b/lapack-netlib/SRC/dgbtrf.c new file mode 100644 index 000000000..a9bf6eed6 --- /dev/null +++ b/lapack-netlib/SRC/dgbtrf.c @@ -0,0 +1,1021 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGBTRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBTRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) */ + +/* INTEGER INFO, KL, KU, LDAB, M, N */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION AB( LDAB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBTRF computes an LU factorization of a real m-by-n band matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > This is the blocked version of the algorithm, calling Level 3 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > On entry, the matrix A in band storage, in rows KL+1 to */ +/* > 2*KL+KU+1; rows 1 to KL of the array need not be set. */ +/* > The j-th column of A is stored in the j-th column of the */ +/* > array AB as follows: */ +/* > AB(kl+ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(m,j+kl) */ +/* > */ +/* > On exit, details of the factorization: U is stored as an */ +/* > upper triangular band matrix with KL+KU superdiagonals in */ +/* > rows 1 to KL+KU+1, and the multipliers used during the */ +/* > factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGBcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The band storage scheme is illustrated by the following example, when */ +/* > M = N = 6, KL = 2, KU = 1: */ +/* > */ +/* > On entry: On exit: */ +/* > */ +/* > * * * + + + * * * u14 u25 u36 */ +/* > * * + + + + * * u13 u24 u35 u46 */ +/* > * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ +/* > a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ +/* > a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ +/* > a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ +/* > */ +/* > Array elements marked * are not used by the routine; elements marked */ +/* > + need not be set on entry, but are required by the routine to store */ +/* > elements of U because of fill-in resulting from the row interchanges. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + + /* Local variables */ + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal temp; + integer i__, j; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dgemm_(char *, char *, integer *, integer *, integer * + , doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), dcopy_( + integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer * + ); + doublereal work13[4160] /* was [65][64] */, work31[4160] /* + was [65][64] */; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer i2, i3, j2, j3, k2; + extern /* Subroutine */ int dgbtf2_(integer *, integer *, integer *, + integer *, doublereal *, integer *, integer *, integer *); + integer jb, nb, ii, jj, jm, ip, jp, km, ju, kv; + extern integer idamax_(integer *, doublereal *, integer *); + integer nw; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* KV is the number of superdiagonals in the factor U, allowing for */ +/* fill-in */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --ipiv; + + /* Function Body */ + kv = *ku + *kl; + +/* Test the input parameters. */ + + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < *kl + kv + 1) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBTRF", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment */ + + nb = ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku, (ftnlen)6, (ftnlen)1); + +/* The block size must not exceed the limit set by the size of the */ +/* local arrays WORK13 and WORK31. */ + + nb = f2cmin(nb,64); + + if (nb <= 1 || nb > *kl) { + +/* Use unblocked code */ + + dgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info); + } else { + +/* Use blocked code */ + +/* Zero the superdiagonal elements of the work array WORK13 */ + + i__1 = nb; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work13[i__ + j * 65 - 66] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* Zero the subdiagonal elements of the work array WORK31 */ + + i__1 = nb; + for (j = 1; j <= i__1; ++j) { + i__2 = nb; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work31[i__ + j * 65 - 66] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* Gaussian elimination with partial pivoting */ + +/* Set fill-in elements in columns KU+2 to KV to zero */ + + i__1 = f2cmin(kv,*n); + for (j = *ku + 2; j <= i__1; ++j) { + i__2 = *kl; + for (i__ = kv - j + 2; i__ <= i__2; ++i__) { + ab[i__ + j * ab_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + +/* JU is the index of the last column affected by the current */ +/* stage of the factorization */ + + ju = 1; + + i__1 = f2cmin(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = f2cmin(*m,*n) - j + 1; + jb = f2cmin(i__3,i__4); + +/* The active part of the matrix is partitioned */ + +/* A11 A12 A13 */ +/* A21 A22 A23 */ +/* A31 A32 A33 */ + +/* Here A11, A21 and A31 denote the current block of JB columns */ +/* which is about to be factorized. The number of rows in the */ +/* partitioning are JB, I2, I3 respectively, and the numbers */ +/* of columns are JB, J2, J3. The superdiagonal elements of A13 */ +/* and the subdiagonal elements of A31 lie outside the band. */ + +/* Computing MIN */ + i__3 = *kl - jb, i__4 = *m - j - jb + 1; + i2 = f2cmin(i__3,i__4); +/* Computing MIN */ + i__3 = jb, i__4 = *m - j - *kl + 1; + i3 = f2cmin(i__3,i__4); + +/* J2 and J3 are computed after JU has been updated. */ + +/* Factorize the current block of JB columns */ + + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + +/* Set fill-in elements in column JJ+KV to zero */ + + if (jj + kv <= *n) { + i__4 = *kl; + for (i__ = 1; i__ <= i__4; ++i__) { + ab[i__ + (jj + kv) * ab_dim1] = 0.; +/* L70: */ + } + } + +/* Find pivot and test for singularity. KM is the number of */ +/* subdiagonal elements in the current column. */ + +/* Computing MIN */ + i__4 = *kl, i__5 = *m - jj; + km = f2cmin(i__4,i__5); + i__4 = km + 1; + jp = idamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1); + ipiv[jj] = jp + jj - j; + if (ab[kv + jp + jj * ab_dim1] != 0.) { +/* Computing MAX */ +/* Computing MIN */ + i__6 = jj + *ku + jp - 1; + i__4 = ju, i__5 = f2cmin(i__6,*n); + ju = f2cmax(i__4,i__5); + if (jp != 1) { + +/* Apply interchange to columns J to J+JB-1 */ + + if (jp + jj - 1 < j + *kl) { + + i__4 = *ldab - 1; + i__5 = *ldab - 1; + dswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], & + i__4, &ab[kv + jp + jj - j + j * ab_dim1], + &i__5); + } else { + +/* The interchange affects columns J to JJ-1 of A31 */ +/* which are stored in the work array WORK31 */ + + i__4 = jj - j; + i__5 = *ldab - 1; + dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], + &i__5, &work31[jp + jj - j - *kl - 1], & + c__65); + i__4 = j + jb - jj; + i__5 = *ldab - 1; + i__6 = *ldab - 1; + dswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, & + ab[kv + jp + jj * ab_dim1], &i__6); + } + } + +/* Compute multipliers */ + + d__1 = 1. / ab[kv + 1 + jj * ab_dim1]; + dscal_(&km, &d__1, &ab[kv + 2 + jj * ab_dim1], &c__1); + +/* Update trailing submatrix within the band and within */ +/* the current block. JM is the index of the last column */ +/* which needs to be updated. */ + +/* Computing MIN */ + i__4 = ju, i__5 = j + jb - 1; + jm = f2cmin(i__4,i__5); + if (jm > jj) { + i__4 = jm - jj; + i__5 = *ldab - 1; + i__6 = *ldab - 1; + dger_(&km, &i__4, &c_b18, &ab[kv + 2 + jj * ab_dim1], + &c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, & + ab[kv + 1 + (jj + 1) * ab_dim1], &i__6); + } + } else { + +/* If pivot is zero, set INFO to the index of the pivot */ +/* unless a zero pivot has already been found. */ + + if (*info == 0) { + *info = jj; + } + } + +/* Copy current column of A31 into the work array WORK31 */ + +/* Computing MIN */ + i__4 = jj - j + 1; + nw = f2cmin(i__4,i3); + if (nw > 0) { + dcopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], & + c__1, &work31[(jj - j + 1) * 65 - 65], &c__1); + } +/* L80: */ + } + if (j + jb <= *n) { + +/* Apply the row interchanges to the other blocks. */ + +/* Computing MIN */ + i__3 = ju - j + 1; + j2 = f2cmin(i__3,kv) - jb; +/* Computing MAX */ + i__3 = 0, i__4 = ju - j - kv + 1; + j3 = f2cmax(i__3,i__4); + +/* Use DLASWP to apply the row interchanges to A12, A22, and */ +/* A32. */ + + i__3 = *ldab - 1; + dlaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, & + c__1, &jb, &ipiv[j], &c__1); + +/* Adjust the pivot indices. */ + + i__3 = j + jb - 1; + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = ipiv[i__] + j - 1; +/* L90: */ + } + +/* Apply the row interchanges to A13, A23, and A33 */ +/* columnwise. */ + + k2 = j - 1 + jb + j2; + i__3 = j3; + for (i__ = 1; i__ <= i__3; ++i__) { + jj = k2 + i__; + i__4 = j + jb - 1; + for (ii = j + i__ - 1; ii <= i__4; ++ii) { + ip = ipiv[ii]; + if (ip != ii) { + temp = ab[kv + 1 + ii - jj + jj * ab_dim1]; + ab[kv + 1 + ii - jj + jj * ab_dim1] = ab[kv + 1 + + ip - jj + jj * ab_dim1]; + ab[kv + 1 + ip - jj + jj * ab_dim1] = temp; + } +/* L100: */ + } +/* L110: */ + } + +/* Update the relevant part of the trailing submatrix */ + + if (j2 > 0) { + +/* Update A12 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, + &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv + + 1 - jb + (j + jb) * ab_dim1], &i__4); + + if (i2 > 0) { + +/* Update A22 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + i__5 = *ldab - 1; + dgemm_("No transpose", "No transpose", &i2, &j2, &jb, + &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, + &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4, + &c_b31, &ab[kv + 1 + (j + jb) * ab_dim1], & + i__5); + } + + if (i3 > 0) { + +/* Update A32 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + dgemm_("No transpose", "No transpose", &i3, &j2, &jb, + &c_b18, work31, &c__65, &ab[kv + 1 - jb + (j + + jb) * ab_dim1], &i__3, &c_b31, &ab[kv + *kl + + 1 - jb + (j + jb) * ab_dim1], &i__4); + } + } + + if (j3 > 0) { + +/* Copy the lower triangle of A13 into the work array */ +/* WORK13 */ + + i__3 = j3; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = jb; + for (ii = jj; ii <= i__4; ++ii) { + work13[ii + jj * 65 - 66] = ab[ii - jj + 1 + (jj + + j + kv - 1) * ab_dim1]; +/* L120: */ + } +/* L130: */ + } + +/* Update A13 in the work array */ + + i__3 = *ldab - 1; + dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, + &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, work13, + &c__65); + + if (i2 > 0) { + +/* Update A23 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + dgemm_("No transpose", "No transpose", &i2, &j3, &jb, + &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, + work13, &c__65, &c_b31, &ab[jb + 1 + (j + kv) + * ab_dim1], &i__4); + } + + if (i3 > 0) { + +/* Update A33 */ + + i__3 = *ldab - 1; + dgemm_("No transpose", "No transpose", &i3, &j3, &jb, + &c_b18, work31, &c__65, work13, &c__65, & + c_b31, &ab[*kl + 1 + (j + kv) * ab_dim1], & + i__3); + } + +/* Copy the lower triangle of A13 back into place */ + + i__3 = j3; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = jb; + for (ii = jj; ii <= i__4; ++ii) { + ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1] = + work13[ii + jj * 65 - 66]; +/* L140: */ + } +/* L150: */ + } + } + } else { + +/* Adjust the pivot indices. */ + + i__3 = j + jb - 1; + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = ipiv[i__] + j - 1; +/* L160: */ + } + } + +/* Partially undo the interchanges in the current block to */ +/* restore the upper triangular form of A31 and copy the upper */ +/* triangle of A31 back into place */ + + i__3 = j; + for (jj = j + jb - 1; jj >= i__3; --jj) { + jp = ipiv[jj] - jj + 1; + if (jp != 1) { + +/* Apply interchange to columns J to JJ-1 */ + + if (jp + jj - 1 < j + *kl) { + +/* The interchange does not affect A31 */ + + i__4 = jj - j; + i__5 = *ldab - 1; + i__6 = *ldab - 1; + dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & + i__5, &ab[kv + jp + jj - j + j * ab_dim1], & + i__6); + } else { + +/* The interchange does affect A31 */ + + i__4 = jj - j; + i__5 = *ldab - 1; + dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & + i__5, &work31[jp + jj - j - *kl - 1], &c__65); + } + } + +/* Copy the current column of A31 back into place */ + +/* Computing MIN */ + i__4 = i3, i__5 = jj - j + 1; + nw = f2cmin(i__4,i__5); + if (nw > 0) { + dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[ + kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1); + } +/* L170: */ + } +/* L180: */ + } + } + + return 0; + +/* End of DGBTRF */ + +} /* dgbtrf_ */ + diff --git a/lapack-netlib/SRC/dgbtrs.c b/lapack-netlib/SRC/dgbtrs.c new file mode 100644 index 000000000..c2045bef3 --- /dev/null +++ b/lapack-netlib/SRC/dgbtrs.c @@ -0,0 +1,686 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGBTRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGBTRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGBTRS solves a system of linear equations */ +/* > A * X = B or A**T * X = B */ +/* > with a general band matrix A using the LU factorization computed */ +/* > by DGBTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations. */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T* X = B (Transpose) */ +/* > = 'C': A**T* X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is DOUBLE PRECISION array, dimension (LDAB,N) */ +/* > Details of the LU factorization of the band matrix A, as */ +/* > computed by DGBTRF. U is stored as an upper triangular band */ +/* > matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ +/* > the multipliers used during the factorization are stored in */ +/* > rows KL+KU+2 to 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= N, row i of the matrix was */ +/* > interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the right hand side matrix B. */ +/* > On exit, the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, + doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer i__, j, l; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), dswap_(integer *, + doublereal *, integer *, doublereal *, integer *), dtbsv_(char *, + char *, char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *); + logical lnoti; + integer kd, lm; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*ldab < (*kl << 1) + *ku + 1) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBTRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + kd = *ku + *kl + 1; + lnoti = *kl > 0; + + if (notran) { + +/* Solve A*X = B. */ + +/* Solve L*X = B, overwriting B with X. */ + +/* L is represented as a product of permutations and unit lower */ +/* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), */ +/* where each transformation L(i) is a rank-one modification of */ +/* the identity matrix. */ + + if (lnoti) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *kl, i__3 = *n - j; + lm = f2cmin(i__2,i__3); + l = ipiv[j]; + if (l != j) { + dswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb); + } + dger_(&lm, nrhs, &c_b7, &ab[kd + 1 + j * ab_dim1], &c__1, &b[ + j + b_dim1], ldb, &b[j + 1 + b_dim1], ldb); +/* L10: */ + } + } + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Solve U*X = B, overwriting B with X. */ + + i__2 = *kl + *ku; + dtbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &ab[ + ab_offset], ldab, &b[i__ * b_dim1 + 1], &c__1); +/* L20: */ + } + + } else { + +/* Solve A**T*X = B. */ + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Solve U**T*X = B, overwriting B with X. */ + + i__2 = *kl + *ku; + dtbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &ab[ab_offset], + ldab, &b[i__ * b_dim1 + 1], &c__1); +/* L30: */ + } + +/* Solve L**T*X = B, overwriting B with X. */ + + if (lnoti) { + for (j = *n - 1; j >= 1; --j) { +/* Computing MIN */ + i__1 = *kl, i__2 = *n - j; + lm = f2cmin(i__1,i__2); + dgemv_("Transpose", &lm, nrhs, &c_b7, &b[j + 1 + b_dim1], ldb, + &ab[kd + 1 + j * ab_dim1], &c__1, &c_b23, &b[j + + b_dim1], ldb); + l = ipiv[j]; + if (l != j) { + dswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb); + } +/* L40: */ + } + } + } + return 0; + +/* End of DGBTRS */ + +} /* dgbtrs_ */ + diff --git a/lapack-netlib/SRC/dgebak.c b/lapack-netlib/SRC/dgebak.c new file mode 100644 index 000000000..054be75e5 --- /dev/null +++ b/lapack-netlib/SRC/dgebak.c @@ -0,0 +1,675 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEBAK */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEBAK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, */ +/* INFO ) */ + +/* CHARACTER JOB, SIDE */ +/* INTEGER IHI, ILO, INFO, LDV, M, N */ +/* DOUBLE PRECISION SCALE( * ), V( LDV, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEBAK forms the right or left eigenvectors of a real general matrix */ +/* > by backward transformation on the computed eigenvectors of the */ +/* > balanced matrix output by DGEBAL. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > Specifies the type of backward transformation required: */ +/* > = 'N': do nothing, return immediately; */ +/* > = 'P': do backward transformation for permutation only; */ +/* > = 'S': do backward transformation for scaling only; */ +/* > = 'B': do backward transformations for both permutation and */ +/* > scaling. */ +/* > JOB must be the same as the argument JOB supplied to DGEBAL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'R': V contains right eigenvectors; */ +/* > = 'L': V contains left eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows of the matrix V. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > The integers ILO and IHI determined by DGEBAL. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SCALE */ +/* > \verbatim */ +/* > SCALE is DOUBLE PRECISION array, dimension (N) */ +/* > Details of the permutation and scaling factors, as returned */ +/* > by DGEBAL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns of the matrix V. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension (LDV,M) */ +/* > On entry, the matrix of right or left eigenvectors to be */ +/* > transformed, as returned by DHSEIN or DTREVC. */ +/* > On exit, V is overwritten by the transformed eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. LDV >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *scale, integer *m, doublereal *v, integer * + ldv, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + integer i__, k; + doublereal s; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical leftv; + integer ii; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical rightv; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Decode and Test the input parameters */ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) { + *info = -4; + } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < f2cmax(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEBAK", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1. / scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* Backward permutation */ + +/* For I = ILO-1 step -1 until 1, */ +/* IHI+1 step 1 until N do -- */ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L40; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L50; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of DGEBAK */ + +} /* dgebak_ */ + diff --git a/lapack-netlib/SRC/dgebal.c b/lapack-netlib/SRC/dgebal.c new file mode 100644 index 000000000..107f306d8 --- /dev/null +++ b/lapack-netlib/SRC/dgebal.c @@ -0,0 +1,840 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEBAL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEBAL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) */ + +/* CHARACTER JOB */ +/* INTEGER IHI, ILO, INFO, LDA, N */ +/* DOUBLE PRECISION A( LDA, * ), SCALE( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEBAL balances a general real matrix A. This involves, first, */ +/* > permuting A by a similarity transformation to isolate eigenvalues */ +/* > in the first 1 to ILO-1 and last IHI+1 to N elements on the */ +/* > diagonal; and second, applying a diagonal similarity transformation */ +/* > to rows and columns ILO to IHI to make the rows and columns as */ +/* > close in norm as possible. Both steps are optional. */ +/* > */ +/* > Balancing may reduce the 1-norm of the matrix, and improve the */ +/* > accuracy of the computed eigenvalues and/or eigenvectors. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > Specifies the operations to be performed on A: */ +/* > = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 */ +/* > for i = 1,...,N; */ +/* > = 'P': permute only; */ +/* > = 'S': scale only; */ +/* > = 'B': both permute and scale. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the input matrix A. */ +/* > On exit, A is overwritten by the balanced matrix. */ +/* > If JOB = 'N', A is not referenced. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > \param[out] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI are set to integers such that on exit */ +/* > A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. */ +/* > If JOB = 'N' or 'S', ILO = 1 and IHI = N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is DOUBLE PRECISION array, dimension (N) */ +/* > Details of the permutations and scaling factors applied to */ +/* > A. If P(j) is the index of the row and column interchanged */ +/* > with row and column j and D(j) is the scaling factor */ +/* > applied to row and column j, then */ +/* > SCALE(j) = P(j) for j = 1,...,ILO-1 */ +/* > = D(j) for j = ILO,...,IHI */ +/* > = P(j) for j = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The permutations consist of row and column interchanges which put */ +/* > the matrix in the form */ +/* > */ +/* > ( T1 X Y ) */ +/* > P A P = ( 0 B Z ) */ +/* > ( 0 0 T2 ) */ +/* > */ +/* > where T1 and T2 are upper triangular matrices whose eigenvalues lie */ +/* > along the diagonal. The column indices ILO and IHI mark the starting */ +/* > and ending columns of the submatrix B. Balancing consists of applying */ +/* > a diagonal similarity transformation inv(D) * B * D to make the */ +/* > 1-norms of each row of B and its corresponding column nearly equal. */ +/* > The output matrix is */ +/* > */ +/* > ( T1 X*D Y ) */ +/* > ( 0 inv(D)*B*D inv(D)*Z ). */ +/* > ( 0 0 T2 ) */ +/* > */ +/* > Information about the permutations P and the diagonal matrix D is */ +/* > returned in the vector SCALE. */ +/* > */ +/* > This subroutine is based on the EISPACK routine BALANC. */ +/* > */ +/* > Modified by Tzu-Yi Chen, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer * + lda, integer *ilo, integer *ihi, doublereal *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer iexc; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + doublereal c__, f, g; + integer i__, j, k, l, m; + doublereal r__, s; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal sfmin1, sfmin2, sfmax1, sfmax2, ca, ra; + extern doublereal dlamch_(char *); + extern integer idamax_(integer *, doublereal *, integer *); + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical noconv; + integer ica, ira; + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEBAL", &i__1, (ftnlen)6); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L10: */ + } + goto L210; + } + + if (lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (doublereal) j; + if (j == m) { + goto L30; + } + + dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + if (a[j + i__ * a_dim1] != 0.) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + if (a[i__ + j * a_dim1] != 0.) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L130: */ + } + + if (lsame_(job, "P")) { + goto L210; + } + +/* Balance the submatrix in rows K to L. */ + +/* Iterative loop for norm reduction */ + + sfmin1 = dlamch_("S") / dlamch_("P"); + sfmax1 = 1. / sfmin1; + sfmin2 = sfmin1 * 2.; + sfmax2 = 1. / sfmin2; + +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + + i__2 = l - k + 1; + c__ = dnrm2_(&i__2, &a[k + i__ * a_dim1], &c__1); + i__2 = l - k + 1; + r__ = dnrm2_(&i__2, &a[i__ + k * a_dim1], lda); + ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1)); + i__2 = *n - k + 1; + ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1)); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0. || r__ == 0.) { + goto L200; + } + g = r__ / 2.; + f = 1.; + s = c__ + r__; +L160: +/* Computing MAX */ + d__1 = f2cmax(f,c__); +/* Computing MIN */ + d__2 = f2cmin(r__,g); + if (c__ >= g || f2cmax(d__1,ca) >= sfmax2 || f2cmin(d__2,ra) <= sfmin2) { + goto L170; + } + d__1 = c__ + f + ca + r__ + g + ra; + if (disnan_(&d__1)) { + +/* Exit if NaN to avoid infinite loop */ + + *info = -3; + i__2 = -(*info); + xerbla_("DGEBAL", &i__2, (ftnlen)6); + return 0; + } + f *= 2.; + c__ *= 2.; + ca *= 2.; + r__ /= 2.; + g /= 2.; + ra /= 2.; + goto L160; + +L170: + g = c__ / 2.; +L180: +/* Computing MIN */ + d__1 = f2cmin(f,c__), d__1 = f2cmin(d__1,g); + if (g < r__ || f2cmax(r__,ra) >= sfmax2 || f2cmin(d__1,ca) <= sfmin2) { + goto L190; + } + f /= 2.; + c__ /= 2.; + g /= 2.; + ca /= 2.; + r__ *= 2.; + ra *= 2.; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95) { + goto L200; + } + if (f < 1. && scale[i__] < 1.) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1. && scale[i__] > 1.) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1. / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of DGEBAL */ + +} /* dgebal_ */ + diff --git a/lapack-netlib/SRC/dgebd2.c b/lapack-netlib/SRC/dgebd2.c new file mode 100644 index 000000000..2cd70809c --- /dev/null +++ b/lapack-netlib/SRC/dgebd2.c @@ -0,0 +1,745 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEBD2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), */ +/* $ TAUQ( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEBD2 reduces a real general m by n matrix A to upper or lower */ +/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */ +/* > */ +/* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows in the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns in the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n general matrix to be reduced. */ +/* > On exit, */ +/* > if m >= n, the diagonal and the first superdiagonal are */ +/* > overwritten with the upper bidiagonal matrix B; the */ +/* > elements below the diagonal, with the array TAUQ, represent */ +/* > the orthogonal matrix Q as a product of elementary */ +/* > reflectors, and the elements above the first superdiagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors; */ +/* > if m < n, the diagonal and the first subdiagonal are */ +/* > overwritten with the lower bidiagonal matrix B; the */ +/* > elements below the first subdiagonal, with the array TAUQ, */ +/* > represent the orthogonal matrix Q as a product of */ +/* > elementary reflectors, and the elements above the diagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The diagonal elements of the bidiagonal matrix B: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (f2cmin(M,N)-1) */ +/* > The off-diagonal elements of the bidiagonal matrix B: */ +/* > if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */ +/* > if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUQ */ +/* > \verbatim */ +/* > TAUQ is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Q. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUP */ +/* > \verbatim */ +/* > TAUP is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix P. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (f2cmax(M,N)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrices Q and P are represented as products of elementary */ +/* > reflectors: */ +/* > */ +/* > If m >= n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */ +/* > u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > If m < n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */ +/* > u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples: */ +/* > */ +/* > m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ +/* > */ +/* > ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) */ +/* > ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) */ +/* > ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) */ +/* > ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) */ +/* > ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) */ +/* > ( v1 v2 v3 v4 v5 ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of B, vi */ +/* > denotes an element of the vector defining H(i), and ui an element of */ +/* > the vector defining G(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("DGEBD2", &i__1, (ftnlen)6); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + if (i__ < *n) { + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & + tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1] + ); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *n) { + +/* Generate elementary reflector G(i) to annihilate */ +/* A(i,i+2:n) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + f2cmin( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } else { + taup[i__] = 0.; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + if (i__ < *m) { + i__2 = *m - i__; + i__3 = *n - i__ + 1; + dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, & + taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *m) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(i+2:m,i) */ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } else { + tauq[i__] = 0.; + } +/* L20: */ + } + } + return 0; + +/* End of DGEBD2 */ + +} /* dgebd2_ */ + diff --git a/lapack-netlib/SRC/dgebrd.c b/lapack-netlib/SRC/dgebrd.c new file mode 100644 index 000000000..72a91940c --- /dev/null +++ b/lapack-netlib/SRC/dgebrd.c @@ -0,0 +1,784 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEBRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEBRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), */ +/* $ TAUQ( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEBRD reduces a general real M-by-N matrix A to upper or lower */ +/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */ +/* > */ +/* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows in the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns in the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N general matrix to be reduced. */ +/* > On exit, */ +/* > if m >= n, the diagonal and the first superdiagonal are */ +/* > overwritten with the upper bidiagonal matrix B; the */ +/* > elements below the diagonal, with the array TAUQ, represent */ +/* > the orthogonal matrix Q as a product of elementary */ +/* > reflectors, and the elements above the first superdiagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors; */ +/* > if m < n, the diagonal and the first subdiagonal are */ +/* > overwritten with the lower bidiagonal matrix B; the */ +/* > elements below the first subdiagonal, with the array TAUQ, */ +/* > represent the orthogonal matrix Q as a product of */ +/* > elementary reflectors, and the elements above the diagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The diagonal elements of the bidiagonal matrix B: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (f2cmin(M,N)-1) */ +/* > The off-diagonal elements of the bidiagonal matrix B: */ +/* > if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */ +/* > if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUQ */ +/* > \verbatim */ +/* > TAUQ is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Q. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUP */ +/* > \verbatim */ +/* > TAUP is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix P. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,M,N). */ +/* > For optimum performance LWORK >= (M+N)*NB, where NB */ +/* > is the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrices Q and P are represented as products of elementary */ +/* > reflectors: */ +/* > */ +/* > If m >= n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */ +/* > u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > If m < n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */ +/* > u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples: */ +/* > */ +/* > m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ +/* > */ +/* > ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) */ +/* > ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) */ +/* > ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) */ +/* > ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) */ +/* > ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) */ +/* > ( v1 v2 v3 v4 v5 ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of B, vi */ +/* > denotes an element of the vector defining H(i), and ui an element of */ +/* > the vector defining G(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer nbmin, iinfo, minmn; + extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + integer nb; + extern /* Subroutine */ int dlabrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + integer nx, ws; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwrkx, ldwrky, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = f2cmax(i__1,i__2); + lwkopt = (*m + *n) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*lwork < f2cmax(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("DGEBRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = f2cmin(*m,*n); + if (minmn == 0) { + work[1] = 1.; + return 0; + } + + ws = f2cmax(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* Set the crossover point NX. */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (*m + *n) * nb; + if (*lwork < ws) { + +/* Not enough work space for the optimal NB, consider using */ +/* a smaller block size. */ + + nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* Reduce rows and columns i:i+nb-1 to bidiagonal form and return */ +/* the matrices X and Y which are needed to update the unreduced */ +/* part of the matrix */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update */ +/* of the form A := A - V*Y**T - X*U**T */ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ + + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & + ldwrky, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + (j + 1) * a_dim1] = e[j]; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + 1 + j * a_dim1] = e[j]; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1] = (doublereal) ws; + return 0; + +/* End of DGEBRD */ + +} /* dgebrd_ */ + diff --git a/lapack-netlib/SRC/dgecon.c b/lapack-netlib/SRC/dgecon.c new file mode 100644 index 000000000..fe47ed355 --- /dev/null +++ b/lapack-netlib/SRC/dgecon.c @@ -0,0 +1,658 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGECON */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGECON + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, */ +/* INFO ) */ + +/* CHARACTER NORM */ +/* INTEGER INFO, LDA, N */ +/* DOUBLE PRECISION ANORM, RCOND */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGECON estimates the reciprocal of the condition number of a general */ +/* > real matrix A, in either the 1-norm or the infinity-norm, using */ +/* > the LU factorization computed by DGETRF. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as */ +/* > RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies whether the 1-norm condition number or the */ +/* > infinity-norm condition number is required: */ +/* > = '1' or 'O': 1-norm; */ +/* > = 'I': Infinity-norm. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by DGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is DOUBLE PRECISION */ +/* > If NORM = '1' or 'O', the 1-norm of the original matrix A. */ +/* > If NORM = 'I', the infinity-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(norm(A) * norm(inv(A))). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgecon_(char *norm, integer *n, doublereal *a, integer * + lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer kase, kase1; + doublereal scale; + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int drscl_(integer *, doublereal *, doublereal *, + integer *), dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal dlamch_(char *); + doublereal sl; + integer ix; + extern integer idamax_(integer *, doublereal *, integer *); + doublereal su; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal ainvnm; + extern /* Subroutine */ int dlatrs_(char *, char *, char *, char *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + logical onenrm; + char normin[1]; + doublereal smlnum; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O"); + if (! onenrm && ! lsame_(norm, "I")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*anorm < 0.) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGECON", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + + smlnum = dlamch_("Safe minimum"); + +/* Estimate the norm of inv(A). */ + + ainvnm = 0.; + *(unsigned char *)normin = 'N'; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L10: + dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(L). */ + + dlatrs_("Lower", "No transpose", "Unit", normin, n, &a[a_offset], + lda, &work[1], &sl, &work[(*n << 1) + 1], info); + +/* Multiply by inv(U). */ + + dlatrs_("Upper", "No transpose", "Non-unit", normin, n, &a[ + a_offset], lda, &work[1], &su, &work[*n * 3 + 1], info); + } else { + +/* Multiply by inv(U**T). */ + + dlatrs_("Upper", "Transpose", "Non-unit", normin, n, &a[a_offset], + lda, &work[1], &su, &work[*n * 3 + 1], info); + +/* Multiply by inv(L**T). */ + + dlatrs_("Lower", "Transpose", "Unit", normin, n, &a[a_offset], + lda, &work[1], &sl, &work[(*n << 1) + 1], info); + } + +/* Divide X by 1/(SL*SU) if doing so will not cause overflow. */ + + scale = sl * su; + *(unsigned char *)normin = 'Y'; + if (scale != 1.) { + ix = idamax_(n, &work[1], &c__1); + if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) + { + goto L20; + } + drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + +L20: + return 0; + +/* End of DGECON */ + +} /* dgecon_ */ + diff --git a/lapack-netlib/SRC/dgeequ.c b/lapack-netlib/SRC/dgeequ.c new file mode 100644 index 000000000..504c79619 --- /dev/null +++ b/lapack-netlib/SRC/dgeequ.c @@ -0,0 +1,733 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEEQU */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEEQU + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* DOUBLE PRECISION AMAX, COLCND, ROWCND */ +/* DOUBLE PRECISION A( LDA, * ), C( * ), R( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEEQU computes row and column scalings intended to equilibrate an */ +/* > M-by-N matrix A and reduce its condition number. R returns the row */ +/* > scale factors and C the column scale factors, chosen to try to make */ +/* > the largest element in each row and column of the matrix B with */ +/* > elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */ +/* > */ +/* > R(i) and C(j) are restricted to be between SMLNUM = smallest safe */ +/* > number and BIGNUM = largest safe number. Use of these scaling */ +/* > factors is not guaranteed to reduce the condition number of A but */ +/* > works well in practice. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The M-by-N matrix whose equilibration factors are */ +/* > to be computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (M) */ +/* > If INFO = 0 or INFO > M, R contains the row scale factors */ +/* > for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, C contains the column scale factors for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ROWCND */ +/* > \verbatim */ +/* > ROWCND is DOUBLE PRECISION */ +/* > If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ +/* > smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ +/* > AMAX is neither too large nor too small, it is not worth */ +/* > scaling by R. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] COLCND */ +/* > \verbatim */ +/* > COLCND is DOUBLE PRECISION */ +/* > If INFO = 0, COLCND contains the ratio of the smallest */ +/* > C(i) to the largest C(i). If COLCND >= 0.1, it is not */ +/* > worth scaling by C. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] AMAX */ +/* > \verbatim */ +/* > AMAX is DOUBLE PRECISION */ +/* > Absolute value of largest matrix element. If AMAX is very */ +/* > close to overflow or very close to underflow, the matrix */ +/* > should be scaled. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= M: the i-th row of A is exactly zero */ +/* > > M: the (i-M)-th column of A is exactly zero */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgeequ_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j; + doublereal rcmin, rcmax; + extern doublereal dlamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum, smlnum; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --r__; + --c__; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEEQU", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + *rowcnd = 1.; + *colcnd = 1.; + *amax = 0.; + return 0; + } + +/* Get machine constants. */ + + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + +/* Compute row scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + r__[i__] = 0.; +/* L10: */ + } + +/* Find the maximum element in each row. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = r__[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + r__[i__] = f2cmax(d__2,d__3); +/* L20: */ + } +/* L30: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[i__]; + rcmax = f2cmax(d__1,d__2); +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[i__]; + rcmin = f2cmin(d__1,d__2); +/* L40: */ + } + *amax = rcmax; + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] == 0.) { + *info = i__; + return 0; + } +/* L50: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = r__[i__]; + d__1 = f2cmax(d__2,smlnum); + r__[i__] = 1. / f2cmin(d__1,bignum); +/* L60: */ + } + +/* Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)) */ + + *rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + +/* Compute column scale factors */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.; +/* L70: */ + } + +/* Find the maximum element in each column, */ +/* assuming the row scaling computed above. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = c__[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)) * + r__[i__]; + c__[j] = f2cmax(d__2,d__3); +/* L80: */ + } +/* L90: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = f2cmax(d__1,d__2); +/* L100: */ + } + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (c__[j] == 0.) { + *info = *m + j; + return 0; + } +/* L110: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = c__[j]; + d__1 = f2cmax(d__2,smlnum); + c__[j] = 1. / f2cmin(d__1,bignum); +/* L120: */ + } + +/* Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)) */ + + *colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + + return 0; + +/* End of DGEEQU */ + +} /* dgeequ_ */ + diff --git a/lapack-netlib/SRC/dgeequb.c b/lapack-netlib/SRC/dgeequb.c new file mode 100644 index 000000000..49397083a --- /dev/null +++ b/lapack-netlib/SRC/dgeequb.c @@ -0,0 +1,753 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEEQUB */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEEQUB + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* DOUBLE PRECISION AMAX, COLCND, ROWCND */ +/* DOUBLE PRECISION A( LDA, * ), C( * ), R( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEEQUB computes row and column scalings intended to equilibrate an */ +/* > M-by-N matrix A and reduce its condition number. R returns the row */ +/* > scale factors and C the column scale factors, chosen to try to make */ +/* > the largest element in each row and column of the matrix B with */ +/* > elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */ +/* > the radix. */ +/* > */ +/* > R(i) and C(j) are restricted to be a power of the radix between */ +/* > SMLNUM = smallest safe number and BIGNUM = largest safe number. Use */ +/* > of these scaling factors is not guaranteed to reduce the condition */ +/* > number of A but works well in practice. */ +/* > */ +/* > This routine differs from DGEEQU by restricting the scaling factors */ +/* > to a power of the radix. Barring over- and underflow, scaling by */ +/* > these factors introduces no additional rounding errors. However, the */ +/* > scaled entries' magnitudes are no longer approximately 1 but lie */ +/* > between sqrt(radix) and 1/sqrt(radix). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The M-by-N matrix whose equilibration factors are */ +/* > to be computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (M) */ +/* > If INFO = 0 or INFO > M, R contains the row scale factors */ +/* > for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, C contains the column scale factors for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ROWCND */ +/* > \verbatim */ +/* > ROWCND is DOUBLE PRECISION */ +/* > If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ +/* > smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ +/* > AMAX is neither too large nor too small, it is not worth */ +/* > scaling by R. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] COLCND */ +/* > \verbatim */ +/* > COLCND is DOUBLE PRECISION */ +/* > If INFO = 0, COLCND contains the ratio of the smallest */ +/* > C(i) to the largest C(i). If COLCND >= 0.1, it is not */ +/* > worth scaling by C. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] AMAX */ +/* > \verbatim */ +/* > AMAX is DOUBLE PRECISION */ +/* > Absolute value of largest matrix element. If AMAX is very */ +/* > close to overflow or very close to underflow, the matrix */ +/* > should be scaled. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= M: the i-th row of A is exactly zero */ +/* > > M: the (i-M)-th column of A is exactly zero */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgeequb_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal + *colcnd, doublereal *amax, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j; + doublereal radix, rcmin, rcmax; + extern doublereal dlamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum, logrdx, smlnum; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --r__; + --c__; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEEQUB", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rowcnd = 1.; + *colcnd = 1.; + *amax = 0.; + return 0; + } + +/* Get machine constants. Assume SMLNUM is a power of the radix. */ + + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + radix = dlamch_("B"); + logrdx = log(radix); + +/* Compute row scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + r__[i__] = 0.; +/* L10: */ + } + +/* Find the maximum element in each row. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = r__[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + r__[i__] = f2cmax(d__2,d__3); +/* L20: */ + } +/* L30: */ + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] > 0.) { + i__2 = (integer) (log(r__[i__]) / logrdx); + r__[i__] = pow_di(&radix, &i__2); + } + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[i__]; + rcmax = f2cmax(d__1,d__2); +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[i__]; + rcmin = f2cmin(d__1,d__2); +/* L40: */ + } + *amax = rcmax; + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] == 0.) { + *info = i__; + return 0; + } +/* L50: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = r__[i__]; + d__1 = f2cmax(d__2,smlnum); + r__[i__] = 1. / f2cmin(d__1,bignum); +/* L60: */ + } + +/* Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)). */ + + *rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + +/* Compute column scale factors */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.; +/* L70: */ + } + +/* Find the maximum element in each column, */ +/* assuming the row scaling computed above. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = c__[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)) * + r__[i__]; + c__[j] = f2cmax(d__2,d__3); +/* L80: */ + } + if (c__[j] > 0.) { + i__2 = (integer) (log(c__[j]) / logrdx); + c__[j] = pow_di(&radix, &i__2); + } +/* L90: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = f2cmax(d__1,d__2); +/* L100: */ + } + + if (rcmin == 0.) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (c__[j] == 0.) { + *info = *m + j; + return 0; + } +/* L110: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ +/* Computing MAX */ + d__2 = c__[j]; + d__1 = f2cmax(d__2,smlnum); + c__[j] = 1. / f2cmin(d__1,bignum); +/* L120: */ + } + +/* Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)). */ + + *colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + + return 0; + +/* End of DGEEQUB */ + +} /* dgeequb_ */ + diff --git a/lapack-netlib/SRC/dgees.c b/lapack-netlib/SRC/dgees.c new file mode 100644 index 000000000..7fdac4a7d --- /dev/null +++ b/lapack-netlib/SRC/dgees.c @@ -0,0 +1,1002 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGEES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors f +or GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEES + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, */ +/* VS, LDVS, WORK, LWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVS, SORT */ +/* INTEGER INFO, LDA, LDVS, LWORK, N, SDIM */ +/* LOGICAL BWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), */ +/* $ WR( * ) */ +/* LOGICAL SELECT */ +/* EXTERNAL SELECT */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEES computes for an N-by-N real nonsymmetric matrix A, the */ +/* > eigenvalues, the real Schur form T, and, optionally, the matrix of */ +/* > Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). */ +/* > */ +/* > Optionally, it also orders the eigenvalues on the diagonal of the */ +/* > real Schur form so that selected eigenvalues are at the top left. */ +/* > The leading columns of Z then form an orthonormal basis for the */ +/* > invariant subspace corresponding to the selected eigenvalues. */ +/* > */ +/* > A matrix is in real Schur form if it is upper quasi-triangular with */ +/* > 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the */ +/* > form */ +/* > [ a b ] */ +/* > [ c a ] */ +/* > */ +/* > where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVS */ +/* > \verbatim */ +/* > JOBVS is CHARACTER*1 */ +/* > = 'N': Schur vectors are not computed; */ +/* > = 'V': Schur vectors are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELECT). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELECT */ +/* > \verbatim */ +/* > SELECT is a LOGICAL FUNCTION of two DOUBLE PRECISION arguments */ +/* > SELECT must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'S', SELECT is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > If SORT = 'N', SELECT is not referenced. */ +/* > An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */ +/* > SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex */ +/* > conjugate pair of eigenvalues is selected, then both complex */ +/* > eigenvalues are selected. */ +/* > Note that a selected complex eigenvalue may no longer */ +/* > satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */ +/* > ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned); in this */ +/* > case INFO is set to N+2 (see INFO below). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. */ +/* > On exit, A has been overwritten by its real Schur form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELECT is true. (Complex conjugate */ +/* > pairs for which SELECT is true for either */ +/* > eigenvalue count as 2.) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WR */ +/* > \verbatim */ +/* > WR is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WI */ +/* > \verbatim */ +/* > WI is DOUBLE PRECISION array, dimension (N) */ +/* > WR and WI contain the real and imaginary parts, */ +/* > respectively, of the computed eigenvalues in the same order */ +/* > that they appear on the diagonal of the output Schur form T. */ +/* > Complex conjugate pairs of eigenvalues will appear */ +/* > consecutively with the eigenvalue having the positive */ +/* > imaginary part first. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VS */ +/* > \verbatim */ +/* > VS is DOUBLE PRECISION array, dimension (LDVS,N) */ +/* > If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */ +/* > vectors. */ +/* > If JOBVS = 'N', VS is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVS */ +/* > \verbatim */ +/* > LDVS is INTEGER */ +/* > The leading dimension of the array VS. LDVS >= 1; if */ +/* > JOBVS = 'V', LDVS >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) contains the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,3*N). */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: the QR algorithm failed to compute all the */ +/* > eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */ +/* > contain those eigenvalues which have converged; if */ +/* > JOBVS = 'V', VS contains the matrix which reduces A */ +/* > to its partially converged Schur form. */ +/* > = N+1: the eigenvalues could not be reordered because some */ +/* > eigenvalues were too close to separate (the problem */ +/* > is very ill-conditioned); */ +/* > = N+2: after reordering, roundoff changed values of some */ +/* > complex eigenvalues so that leading eigenvalues in */ +/* > the Schur form no longer satisfy SELECT=.TRUE. This */ +/* > could also be caused by underflow due to scaling. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int dgees_(char *jobvs, char *sort, L_fp select, integer *n, + doublereal *a, integer *lda, integer *sdim, doublereal *wr, + doublereal *wi, doublereal *vs, integer *ldvs, doublereal *work, + integer *lwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3; + + /* Local variables */ + integer ibal; + doublereal anrm; + integer idum[1], ierr, itau, iwrk, inxt, i__; + doublereal s; + integer icond, ieval; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + logical cursl; + integer i1, i2; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + dgebal_(char *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + logical lst2sl, scalea; + integer ip; + doublereal cscale; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dhseqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), dtrsen_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *, integer *); + logical lastsl; + integer minwrk, maxwrk; + doublereal smlnum; + integer hswork; + logical wantst, lquery, wantvs; + integer ihi, ilo; + doublereal dum[1], eps, sep; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --wr; + --wi; + vs_dim1 = *ldvs; + vs_offset = 1 + vs_dim1 * 1; + vs -= vs_offset; + --work; + --bwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvs = lsame_(jobvs, "V"); + wantst = lsame_(sort, "S"); + if (! wantvs && ! lsame_(jobvs, "N")) { + *info = -1; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldvs < 1 || wantvs && *ldvs < *n) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by DHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, + n, &c__0, (ftnlen)6, (ftnlen)1); + minwrk = *n * 3; + + dhseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1] + , &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval); + hswork = (integer) work[1]; + + if (! wantvs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = f2cmax(i__1,i__2); + } + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEES ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Workspace: need N) */ + + ibal = 1; + dgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* Reduce to upper Hessenberg form */ +/* (Workspace: need 3*N, prefer 2*N+N*NB) */ + + itau = *n + ibal; + iwrk = *n + itau; + i__1 = *lwork - iwrk + 1; + dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvs) { + +/* Copy Householder vectors to VS */ + + dlacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs) + ; + +/* Generate orthogonal matrix in VS */ +/* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk], + &i__1, &ierr); + } + + *sdim = 0; + +/* Perform QR iteration, accumulating Schur vectors in VS if desired */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[ + vs_offset], ldvs, &work[iwrk], &i__1, &ieval); + if (ieval > 0) { + *info = ieval; + } + +/* Sort eigenvalues if desired */ + + if (wantst && *info == 0) { + if (scalea) { + dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, & + ierr); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, & + ierr); + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*select)(&wr[i__], &wi[i__]); +/* L10: */ + } + +/* Reorder eigenvalues and transform Schur vectors */ +/* (Workspace: none needed) */ + + i__1 = *lwork - iwrk + 1; + dtrsen_("N", jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset], + ldvs, &wr[1], &wi[1], sdim, &s, &sep, &work[iwrk], &i__1, + idum, &c__1, &icond); + if (icond > 0) { + *info = *n + icond; + } + } + + if (wantvs) { + +/* Undo balancing */ +/* (Workspace: need N) */ + + dgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs, + &ierr); + } + + if (scalea) { + +/* Undo scaling for the Schur form of A */ + + dlascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, & + ierr); + i__1 = *lda + 1; + dcopy_(n, &a[a_offset], &i__1, &wr[1], &c__1); + if (cscale == smlnum) { + +/* If scaling back towards underflow, adjust WI if an */ +/* offdiagonal element of a 2-by-2 block in the Schur form */ +/* underflows. */ + + if (ieval > 0) { + i1 = ieval + 1; + i2 = ihi - 1; + i__1 = ilo - 1; +/* Computing MAX */ + i__3 = ilo - 1; + i__2 = f2cmax(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ + 1], &i__2, &ierr); + } else if (wantst) { + i1 = 1; + i2 = *n - 1; + } else { + i1 = ilo; + i2 = ihi - 1; + } + inxt = i1 - 1; + i__1 = i2; + for (i__ = i1; i__ <= i__1; ++i__) { + if (i__ < inxt) { + goto L20; + } + if (wi[i__] == 0.) { + inxt = i__ + 1; + } else { + if (a[i__ + 1 + i__ * a_dim1] == 0.) { + wi[i__] = 0.; + wi[i__ + 1] = 0.; + } else if (a[i__ + 1 + i__ * a_dim1] != 0. && a[i__ + ( + i__ + 1) * a_dim1] == 0.) { + wi[i__] = 0.; + wi[i__ + 1] = 0.; + if (i__ > 1) { + i__2 = i__ - 1; + dswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[( + i__ + 1) * a_dim1 + 1], &c__1); + } + if (*n > i__ + 1) { + i__2 = *n - i__ - 1; + dswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, & + a[i__ + 1 + (i__ + 2) * a_dim1], lda); + } + if (wantvs) { + dswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ + + 1) * vs_dim1 + 1], &c__1); + } + a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * + a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 0.; + } + inxt = i__ + 2; + } +L20: + ; + } + } + +/* Undo scaling for the imaginary part of the eigenvalues */ + + i__1 = *n - ieval; +/* Computing MAX */ + i__3 = *n - ieval; + i__2 = f2cmax(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + + 1], &i__2, &ierr); + } + + if (wantst && *info == 0) { + +/* Check if reordering successful */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*select)(&wr[i__], &wi[i__]); + if (wi[i__] == 0.) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L30: */ + } + } + + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGEES */ + +} /* dgees_ */ + diff --git a/lapack-netlib/SRC/dgeesx.c b/lapack-netlib/SRC/dgeesx.c new file mode 100644 index 000000000..4bf01895b --- /dev/null +++ b/lapack-netlib/SRC/dgeesx.c @@ -0,0 +1,1124 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGEESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors +for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEESX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, */ +/* WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, */ +/* IWORK, LIWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVS, SENSE, SORT */ +/* INTEGER INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM */ +/* DOUBLE PRECISION RCONDE, RCONDV */ +/* LOGICAL BWORK( * ) */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), */ +/* $ WR( * ) */ +/* LOGICAL SELECT */ +/* EXTERNAL SELECT */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEESX computes for an N-by-N real nonsymmetric matrix A, the */ +/* > eigenvalues, the real Schur form T, and, optionally, the matrix of */ +/* > Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). */ +/* > */ +/* > Optionally, it also orders the eigenvalues on the diagonal of the */ +/* > real Schur form so that selected eigenvalues are at the top left; */ +/* > computes a reciprocal condition number for the average of the */ +/* > selected eigenvalues (RCONDE); and computes a reciprocal condition */ +/* > number for the right invariant subspace corresponding to the */ +/* > selected eigenvalues (RCONDV). The leading columns of Z form an */ +/* > orthonormal basis for this invariant subspace. */ +/* > */ +/* > For further explanation of the reciprocal condition numbers RCONDE */ +/* > and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where */ +/* > these quantities are called s and sep respectively). */ +/* > */ +/* > A real matrix is in real Schur form if it is upper quasi-triangular */ +/* > with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in */ +/* > the form */ +/* > [ a b ] */ +/* > [ c a ] */ +/* > */ +/* > where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVS */ +/* > \verbatim */ +/* > JOBVS is CHARACTER*1 */ +/* > = 'N': Schur vectors are not computed; */ +/* > = 'V': Schur vectors are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELECT). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELECT */ +/* > \verbatim */ +/* > SELECT is a LOGICAL FUNCTION of two DOUBLE PRECISION arguments */ +/* > SELECT must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'S', SELECT is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > If SORT = 'N', SELECT is not referenced. */ +/* > An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */ +/* > SELECT(WR(j),WI(j)) is true; i.e., if either one of a */ +/* > complex conjugate pair of eigenvalues is selected, then both */ +/* > are. Note that a selected complex eigenvalue may no longer */ +/* > satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */ +/* > ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned); in this */ +/* > case INFO may be set to N+3 (see INFO below). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SENSE */ +/* > \verbatim */ +/* > SENSE is CHARACTER*1 */ +/* > Determines which reciprocal condition numbers are computed. */ +/* > = 'N': None are computed; */ +/* > = 'E': Computed for average of selected eigenvalues only; */ +/* > = 'V': Computed for selected right invariant subspace only; */ +/* > = 'B': Computed for both. */ +/* > If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA, N) */ +/* > On entry, the N-by-N matrix A. */ +/* > On exit, A is overwritten by its real Schur form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELECT is true. (Complex conjugate */ +/* > pairs for which SELECT is true for either */ +/* > eigenvalue count as 2.) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WR */ +/* > \verbatim */ +/* > WR is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WI */ +/* > \verbatim */ +/* > WI is DOUBLE PRECISION array, dimension (N) */ +/* > WR and WI contain the real and imaginary parts, respectively, */ +/* > of the computed eigenvalues, in the same order that they */ +/* > appear on the diagonal of the output Schur form T. Complex */ +/* > conjugate pairs of eigenvalues appear consecutively with the */ +/* > eigenvalue having the positive imaginary part first. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VS */ +/* > \verbatim */ +/* > VS is DOUBLE PRECISION array, dimension (LDVS,N) */ +/* > If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */ +/* > vectors. */ +/* > If JOBVS = 'N', VS is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVS */ +/* > \verbatim */ +/* > LDVS is INTEGER */ +/* > The leading dimension of the array VS. LDVS >= 1, and if */ +/* > JOBVS = 'V', LDVS >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDE */ +/* > \verbatim */ +/* > RCONDE is DOUBLE PRECISION */ +/* > If SENSE = 'E' or 'B', RCONDE contains the reciprocal */ +/* > condition number for the average of the selected eigenvalues. */ +/* > Not referenced if SENSE = 'N' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDV */ +/* > \verbatim */ +/* > RCONDV is DOUBLE PRECISION */ +/* > If SENSE = 'V' or 'B', RCONDV contains the reciprocal */ +/* > condition number for the selected right invariant subspace. */ +/* > Not referenced if SENSE = 'N' or 'E'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,3*N). */ +/* > Also, if SENSE = 'E' or 'V' or 'B', */ +/* > LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of */ +/* > selected eigenvalues computed by this routine. Note that */ +/* > N+2*SDIM*(N-SDIM) <= N+N*N/2. Note also that an error is only */ +/* > returned if LWORK < f2cmax(1,3*N), but if SENSE = 'E' or 'V' or */ +/* > 'B' this may not be large enough. */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates upper bounds on the optimal sizes of the */ +/* > arrays WORK and IWORK, returns these values as the first */ +/* > entries of the WORK and IWORK arrays, and no error messages */ +/* > related to LWORK or LIWORK are issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). */ +/* > Note that SDIM*(N-SDIM) <= N*N/4. Note also that an error is */ +/* > only returned if LIWORK < 1, but if SENSE = 'V' or 'B' this */ +/* > may not be large enough. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates upper bounds on the optimal sizes of */ +/* > the arrays WORK and IWORK, returns these values as the first */ +/* > entries of the WORK and IWORK arrays, and no error messages */ +/* > related to LWORK or LIWORK are issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: the QR algorithm failed to compute all the */ +/* > eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */ +/* > contain those eigenvalues which have converged; if */ +/* > JOBVS = 'V', VS contains the transformation which */ +/* > reduces A to its partially converged Schur form. */ +/* > = N+1: the eigenvalues could not be reordered because some */ +/* > eigenvalues were too close to separate (the problem */ +/* > is very ill-conditioned); */ +/* > = N+2: after reordering, roundoff changed values of some */ +/* > complex eigenvalues so that leading eigenvalues in */ +/* > the Schur form no longer satisfy SELECT=.TRUE. This */ +/* > could also be caused by underflow due to scaling. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup doubleGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int dgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, doublereal *a, integer *lda, integer *sdim, + doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, + doublereal *rconde, doublereal *rcondv, doublereal *work, integer * + lwork, integer *iwork, integer *liwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3; + + /* Local variables */ + integer ibal; + doublereal anrm; + integer ierr, itau, iwrk, lwrk, inxt, i__, icond, ieval; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + logical cursl; + integer liwrk, i1, i2; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + dgebal_(char *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + logical lst2sl, scalea; + integer ip; + doublereal cscale; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dhseqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + logical wantsb; + extern /* Subroutine */ int dtrsen_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *, integer *); + logical wantse, lastsl; + integer minwrk, maxwrk; + logical wantsn; + doublereal smlnum; + integer hswork; + logical wantst, lquery, wantsv, wantvs; + integer ihi, ilo; + doublereal dum[1], eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --wr; + --wi; + vs_dim1 = *ldvs; + vs_offset = 1 + vs_dim1 * 1; + vs -= vs_offset; + --work; + --iwork; + --bwork; + + /* Function Body */ + *info = 0; + wantvs = lsame_(jobvs, "V"); + wantst = lsame_(sort, "S"); + wantsn = lsame_(sense, "N"); + wantse = lsame_(sense, "E"); + wantsv = lsame_(sense, "V"); + wantsb = lsame_(sense, "B"); + lquery = *lwork == -1 || *liwork == -1; + + if (! wantvs && ! lsame_(jobvs, "N")) { + *info = -1; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -2; + } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! + wantsn) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvs < 1 || wantvs && *ldvs < *n) { + *info = -12; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "RWorkspace:" describe the */ +/* minimal amount of real workspace needed at that point in the */ +/* code, as well as the preferred amount for good performance. */ +/* IWorkspace refers to integer workspace. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by DHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case. */ +/* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed */ +/* depends on SDIM, which is computed by the routine DTRSEN later */ +/* in the code.) */ + + if (*info == 0) { + liwrk = 1; + if (*n == 0) { + minwrk = 1; + lwrk = 1; + } else { + maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, + n, &c__0, (ftnlen)6, (ftnlen)1); + minwrk = *n * 3; + + dhseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1] + , &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval); + hswork = (integer) work[1]; + + if (! wantvs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = f2cmax(i__1,i__2); + } + lwrk = maxwrk; + if (! wantsn) { +/* Computing MAX */ + i__1 = lwrk, i__2 = *n + *n * *n / 2; + lwrk = f2cmax(i__1,i__2); + } + if (wantsv || wantsb) { + liwrk = *n * *n / 4; + } + } + iwork[1] = liwrk; + work[1] = (doublereal) lwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -16; + } else if (*liwork < 1 && ! lquery) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEESX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (RWorkspace: need N) */ + + ibal = 1; + dgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* Reduce to upper Hessenberg form */ +/* (RWorkspace: need 3*N, prefer 2*N+N*NB) */ + + itau = *n + ibal; + iwrk = *n + itau; + i__1 = *lwork - iwrk + 1; + dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvs) { + +/* Copy Householder vectors to VS */ + + dlacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs) + ; + +/* Generate orthogonal matrix in VS */ +/* (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk], + &i__1, &ierr); + } + + *sdim = 0; + +/* Perform QR iteration, accumulating Schur vectors in VS if desired */ +/* (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[ + vs_offset], ldvs, &work[iwrk], &i__1, &ieval); + if (ieval > 0) { + *info = ieval; + } + +/* Sort eigenvalues if desired */ + + if (wantst && *info == 0) { + if (scalea) { + dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, & + ierr); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, & + ierr); + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*select)(&wr[i__], &wi[i__]); +/* L10: */ + } + +/* Reorder eigenvalues, transform Schur vectors, and compute */ +/* reciprocal condition numbers */ +/* (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) */ +/* otherwise, need N ) */ +/* (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) */ +/* otherwise, need 0 ) */ + + i__1 = *lwork - iwrk + 1; + dtrsen_(sense, jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset], + ldvs, &wr[1], &wi[1], sdim, rconde, rcondv, &work[iwrk], & + i__1, &iwork[1], liwork, &icond); + if (! wantsn) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*sdim << 1) * (*n - *sdim); + maxwrk = f2cmax(i__1,i__2); + } + if (icond == -15) { + +/* Not enough real workspace */ + + *info = -16; + } else if (icond == -17) { + +/* Not enough integer workspace */ + + *info = -18; + } else if (icond > 0) { + +/* DTRSEN failed to reorder or to restore standard Schur form */ + + *info = icond + *n; + } + } + + if (wantvs) { + +/* Undo balancing */ +/* (RWorkspace: need N) */ + + dgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs, + &ierr); + } + + if (scalea) { + +/* Undo scaling for the Schur form of A */ + + dlascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, & + ierr); + i__1 = *lda + 1; + dcopy_(n, &a[a_offset], &i__1, &wr[1], &c__1); + if ((wantsv || wantsb) && *info == 0) { + dum[0] = *rcondv; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, & + c__1, &ierr); + *rcondv = dum[0]; + } + if (cscale == smlnum) { + +/* If scaling back towards underflow, adjust WI if an */ +/* offdiagonal element of a 2-by-2 block in the Schur form */ +/* underflows. */ + + if (ieval > 0) { + i1 = ieval + 1; + i2 = ihi - 1; + i__1 = ilo - 1; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ + 1], n, &ierr); + } else if (wantst) { + i1 = 1; + i2 = *n - 1; + } else { + i1 = ilo; + i2 = ihi - 1; + } + inxt = i1 - 1; + i__1 = i2; + for (i__ = i1; i__ <= i__1; ++i__) { + if (i__ < inxt) { + goto L20; + } + if (wi[i__] == 0.) { + inxt = i__ + 1; + } else { + if (a[i__ + 1 + i__ * a_dim1] == 0.) { + wi[i__] = 0.; + wi[i__ + 1] = 0.; + } else if (a[i__ + 1 + i__ * a_dim1] != 0. && a[i__ + ( + i__ + 1) * a_dim1] == 0.) { + wi[i__] = 0.; + wi[i__ + 1] = 0.; + if (i__ > 1) { + i__2 = i__ - 1; + dswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[( + i__ + 1) * a_dim1 + 1], &c__1); + } + if (*n > i__ + 1) { + i__2 = *n - i__ - 1; + dswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, & + a[i__ + 1 + (i__ + 2) * a_dim1], lda); + } + if (wantvs) { + dswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ + + 1) * vs_dim1 + 1], &c__1); + } + a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * + a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 0.; + } + inxt = i__ + 2; + } +L20: + ; + } + } + i__1 = *n - ieval; +/* Computing MAX */ + i__3 = *n - ieval; + i__2 = f2cmax(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + + 1], &i__2, &ierr); + } + + if (wantst && *info == 0) { + +/* Check if reordering successful */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*select)(&wr[i__], &wi[i__]); + if (wi[i__] == 0.) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L30: */ + } + } + + work[1] = (doublereal) maxwrk; + if (wantsv || wantsb) { +/* Computing MAX */ + i__1 = 1, i__2 = *sdim * (*n - *sdim); + iwork[1] = f2cmax(i__1,i__2); + } else { + iwork[1] = 1; + } + + return 0; + +/* End of DGEESX */ + +} /* dgeesx_ */ + diff --git a/lapack-netlib/SRC/dgeev.c b/lapack-netlib/SRC/dgeev.c new file mode 100644 index 000000000..6ded77f0c --- /dev/null +++ b/lapack-netlib/SRC/dgeev.c @@ -0,0 +1,1039 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matr +ices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, */ +/* LDVR, WORK, LWORK, INFO ) */ + +/* CHARACTER JOBVL, JOBVR */ +/* INTEGER INFO, LDA, LDVL, LDVR, LWORK, N */ +/* DOUBLE PRECISION A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WI( * ), WORK( * ), WR( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEEV computes for an N-by-N real nonsymmetric matrix A, the */ +/* > eigenvalues and, optionally, the left and/or right eigenvectors. */ +/* > */ +/* > The right eigenvector v(j) of A satisfies */ +/* > A * v(j) = lambda(j) * v(j) */ +/* > where lambda(j) is its eigenvalue. */ +/* > The left eigenvector u(j) of A satisfies */ +/* > u(j)**H * A = lambda(j) * u(j)**H */ +/* > where u(j)**H denotes the conjugate-transpose of u(j). */ +/* > */ +/* > The computed eigenvectors are normalized to have Euclidean norm */ +/* > equal to 1 and largest component real. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': left eigenvectors of A are not computed; */ +/* > = 'V': left eigenvectors of A are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': right eigenvectors of A are not computed; */ +/* > = 'V': right eigenvectors of A are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. */ +/* > On exit, A has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WR */ +/* > \verbatim */ +/* > WR is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WI */ +/* > \verbatim */ +/* > WI is DOUBLE PRECISION array, dimension (N) */ +/* > WR and WI contain the real and imaginary parts, */ +/* > respectively, of the computed eigenvalues. Complex */ +/* > conjugate pairs of eigenvalues appear consecutively */ +/* > with the eigenvalue having the positive imaginary part */ +/* > first. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* > after another in the columns of VL, in the same order */ +/* > as their eigenvalues. */ +/* > If JOBVL = 'N', VL is not referenced. */ +/* > If the j-th eigenvalue is real, then u(j) = VL(:,j), */ +/* > the j-th column of VL. */ +/* > If the j-th and (j+1)-st eigenvalues form a complex */ +/* > conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */ +/* > u(j+1) = VL(:,j) - i*VL(:,j+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the array VL. LDVL >= 1; if */ +/* > JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is DOUBLE PRECISION array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* > after another in the columns of VR, in the same order */ +/* > as their eigenvalues. */ +/* > If JOBVR = 'N', VR is not referenced. */ +/* > If the j-th eigenvalue is real, then v(j) = VR(:,j), */ +/* > the j-th column of VR. */ +/* > If the j-th and (j+1)-st eigenvalues form a complex */ +/* > conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */ +/* > v(j+1) = VR(:,j) - i*VR(:,j+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the array VR. LDVR >= 1; if */ +/* > JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,3*N), and */ +/* > if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good */ +/* > performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the QR algorithm failed to compute all the */ +/* > eigenvalues, and no eigenvectors have been computed; */ +/* > elements i+1:N of WR and WI contain eigenvalues which */ +/* > have converged. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* @precisions fortran d -> s */ + +/* > \ingroup doubleGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, + integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + doublereal d__1, d__2; + + /* Local variables */ + integer ibal; + char side[1]; + doublereal anrm; + integer ierr, itau; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer iwrk, nout; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + integer i__, k; + doublereal r__; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern doublereal dlapy2_(doublereal *, doublereal *); + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + dgebal_(char *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + doublereal cs; + logical scalea; + extern doublereal dlamch_(char *); + doublereal cscale; + extern doublereal dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal sn; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), xerbla_(char *, integer *, ftnlen); + logical select[1]; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dhseqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer minwrk, maxwrk; + logical wantvl; + doublereal smlnum; + integer hswork; + logical lquery, wantvr; + extern /* Subroutine */ int dtrevc3_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *, integer + *); + integer ihi; + doublereal scl; + integer ilo; + doublereal dum[1], eps; + integer lwork_trevc__; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -9; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by DHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, + n, &c__0, (ftnlen)6, (ftnlen)1); + if (wantvl) { + minwrk = *n << 2; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = f2cmax(i__1,i__2); + dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = * + n + hswork; + maxwrk = f2cmax(i__1,i__2); + dtrevc3_("L", "B", select, n, &a[a_offset], lda, &vl[ + vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, & + work[1], &c_n1, &ierr); + lwork_trevc__ = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = f2cmax(i__1,i__2); + } else if (wantvr) { + minwrk = *n << 2; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = f2cmax(i__1,i__2); + dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = * + n + hswork; + maxwrk = f2cmax(i__1,i__2); + dtrevc3_("R", "B", select, n, &a[a_offset], lda, &vl[ + vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, & + work[1], &c_n1, &ierr); + lwork_trevc__ = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = f2cmax(i__1,i__2); + } else { + minwrk = *n * 3; + dhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = * + n + hswork; + maxwrk = f2cmax(i__1,i__2); + } + maxwrk = f2cmax(maxwrk,minwrk); + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Balance the matrix */ +/* (Workspace: need N) */ + + ibal = 1; + dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* Reduce to upper Hessenberg form */ +/* (Workspace: need 3*N, prefer 2*N+N*NB) */ + + itau = ibal + *n; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* Want left eigenvectors */ +/* Copy Householder vectors to VL */ + + *(unsigned char *)side = 'L'; + dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* Generate orthogonal matrix in VL */ +/* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VL */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vl[vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* Want left and right eigenvectors */ +/* Copy Schur vectors to VR */ + + *(unsigned char *)side = 'B'; + dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* Want right eigenvectors */ +/* Copy Householder vectors to VR */ + + *(unsigned char *)side = 'R'; + dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* Generate orthogonal matrix in VR */ +/* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VR */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* Compute eigenvalues only */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO .NE. 0 from DHSEQR, then quit */ + + if (*info != 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* Compute left and/or right eigenvectors */ +/* (Workspace: need 4*N, prefer N + N + 2*N*NB) */ + + i__1 = *lwork - iwrk + 1; + dtrevc3_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &i__1, & + ierr); + } + + if (wantvl) { + +/* Undo balancing of left eigenvectors */ +/* (Workspace: need N) */ + + dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, + &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vl[k + i__ * vl_dim1]; +/* Computing 2nd power */ + d__2 = vl[k + (i__ + 1) * vl_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L10: */ + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], + &cs, &sn, &r__); + drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * + vl_dim1 + 1], &c__1, &cs, &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.; + } +/* L20: */ + } + } + + if (wantvr) { + +/* Undo balancing of right eigenvectors */ +/* (Workspace: need N) */ + + dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, + &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vr[k + i__ * vr_dim1]; +/* Computing 2nd power */ + d__2 = vr[k + (i__ + 1) * vr_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L30: */ + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], + &cs, &sn, &r__); + drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * + vr_dim1 + 1], &c__1, &cs, &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.; + } +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = f2cmax(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + + 1], &i__2, &ierr); + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = f2cmax(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + + 1], &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], + n, &ierr); + i__1 = ilo - 1; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], + n, &ierr); + } + } + + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGEEV */ + +} /* dgeev_ */ + diff --git a/lapack-netlib/SRC/dgeevx.c b/lapack-netlib/SRC/dgeevx.c new file mode 100644 index 000000000..1de111683 --- /dev/null +++ b/lapack-netlib/SRC/dgeevx.c @@ -0,0 +1,1206 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEEVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, */ +/* VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, */ +/* RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) */ + +/* CHARACTER BALANC, JOBVL, JOBVR, SENSE */ +/* INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N */ +/* DOUBLE PRECISION ABNRM */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), RCONDE( * ), RCONDV( * ), */ +/* $ SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WI( * ), WORK( * ), WR( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEEVX computes for an N-by-N real nonsymmetric matrix A, the */ +/* > eigenvalues and, optionally, the left and/or right eigenvectors. */ +/* > */ +/* > Optionally also, it computes a balancing transformation to improve */ +/* > the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */ +/* > SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues */ +/* > (RCONDE), and reciprocal condition numbers for the right */ +/* > eigenvectors (RCONDV). */ +/* > */ +/* > The right eigenvector v(j) of A satisfies */ +/* > A * v(j) = lambda(j) * v(j) */ +/* > where lambda(j) is its eigenvalue. */ +/* > The left eigenvector u(j) of A satisfies */ +/* > u(j)**H * A = lambda(j) * u(j)**H */ +/* > where u(j)**H denotes the conjugate-transpose of u(j). */ +/* > */ +/* > The computed eigenvectors are normalized to have Euclidean norm */ +/* > equal to 1 and largest component real. */ +/* > */ +/* > Balancing a matrix means permuting the rows and columns to make it */ +/* > more nearly upper triangular, and applying a diagonal similarity */ +/* > transformation D * A * D**(-1), where D is a diagonal matrix, to */ +/* > make its rows and columns closer in norm and the condition numbers */ +/* > of its eigenvalues and eigenvectors smaller. The computed */ +/* > reciprocal condition numbers correspond to the balanced matrix. */ +/* > Permuting rows and columns will not change the condition numbers */ +/* > (in exact arithmetic) but diagonal scaling will. For further */ +/* > explanation of balancing, see section 4.10.2 of the LAPACK */ +/* > Users' Guide. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] BALANC */ +/* > \verbatim */ +/* > BALANC is CHARACTER*1 */ +/* > Indicates how the input matrix should be diagonally scaled */ +/* > and/or permuted to improve the conditioning of its */ +/* > eigenvalues. */ +/* > = 'N': Do not diagonally scale or permute; */ +/* > = 'P': Perform permutations to make the matrix more nearly */ +/* > upper triangular. Do not diagonally scale; */ +/* > = 'S': Diagonally scale the matrix, i.e. replace A by */ +/* > D*A*D**(-1), where D is a diagonal matrix chosen */ +/* > to make the rows and columns of A more equal in */ +/* > norm. Do not permute; */ +/* > = 'B': Both diagonally scale and permute A. */ +/* > */ +/* > Computed reciprocal condition numbers will be for the matrix */ +/* > after balancing and/or permuting. Permuting does not change */ +/* > condition numbers (in exact arithmetic), but balancing does. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': left eigenvectors of A are not computed; */ +/* > = 'V': left eigenvectors of A are computed. */ +/* > If SENSE = 'E' or 'B', JOBVL must = 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': right eigenvectors of A are not computed; */ +/* > = 'V': right eigenvectors of A are computed. */ +/* > If SENSE = 'E' or 'B', JOBVR must = 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SENSE */ +/* > \verbatim */ +/* > SENSE is CHARACTER*1 */ +/* > Determines which reciprocal condition numbers are computed. */ +/* > = 'N': None are computed; */ +/* > = 'E': Computed for eigenvalues only; */ +/* > = 'V': Computed for right eigenvectors only; */ +/* > = 'B': Computed for eigenvalues and right eigenvectors. */ +/* > */ +/* > If SENSE = 'E' or 'B', both left and right eigenvectors */ +/* > must also be computed (JOBVL = 'V' and JOBVR = 'V'). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. */ +/* > On exit, A has been overwritten. If JOBVL = 'V' or */ +/* > JOBVR = 'V', A contains the real Schur form of the balanced */ +/* > version of the input matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WR */ +/* > \verbatim */ +/* > WR is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WI */ +/* > \verbatim */ +/* > WI is DOUBLE PRECISION array, dimension (N) */ +/* > WR and WI contain the real and imaginary parts, */ +/* > respectively, of the computed eigenvalues. Complex */ +/* > conjugate pairs of eigenvalues will appear consecutively */ +/* > with the eigenvalue having the positive imaginary part */ +/* > first. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* > after another in the columns of VL, in the same order */ +/* > as their eigenvalues. */ +/* > If JOBVL = 'N', VL is not referenced. */ +/* > If the j-th eigenvalue is real, then u(j) = VL(:,j), */ +/* > the j-th column of VL. */ +/* > If the j-th and (j+1)-st eigenvalues form a complex */ +/* > conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */ +/* > u(j+1) = VL(:,j) - i*VL(:,j+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the array VL. LDVL >= 1; if */ +/* > JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is DOUBLE PRECISION array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* > after another in the columns of VR, in the same order */ +/* > as their eigenvalues. */ +/* > If JOBVR = 'N', VR is not referenced. */ +/* > If the j-th eigenvalue is real, then v(j) = VR(:,j), */ +/* > the j-th column of VR. */ +/* > If the j-th and (j+1)-st eigenvalues form a complex */ +/* > conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */ +/* > v(j+1) = VR(:,j) - i*VR(:,j+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the array VR. LDVR >= 1, and if */ +/* > JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI are integer values determined when A was */ +/* > balanced. The balanced A(i,j) = 0 if I > J and */ +/* > J = 1,...,ILO-1 or I = IHI+1,...,N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is DOUBLE PRECISION array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > when balancing A. If P(j) is the index of the row and column */ +/* > interchanged with row and column j, and D(j) is the scaling */ +/* > factor applied to row and column j, then */ +/* > SCALE(J) = P(J), for J = 1,...,ILO-1 */ +/* > = D(J), for J = ILO,...,IHI */ +/* > = P(J) for J = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ABNRM */ +/* > \verbatim */ +/* > ABNRM is DOUBLE PRECISION */ +/* > The one-norm of the balanced matrix (the maximum */ +/* > of the sum of absolute values of elements of any column). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDE */ +/* > \verbatim */ +/* > RCONDE is DOUBLE PRECISION array, dimension (N) */ +/* > RCONDE(j) is the reciprocal condition number of the j-th */ +/* > eigenvalue. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDV */ +/* > \verbatim */ +/* > RCONDV is DOUBLE PRECISION array, dimension (N) */ +/* > RCONDV(j) is the reciprocal condition number of the j-th */ +/* > right eigenvector. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. If SENSE = 'N' or 'E', */ +/* > LWORK >= f2cmax(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', */ +/* > LWORK >= 3*N. If SENSE = 'V' or 'B', LWORK >= N*(N+6). */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (2*N-2) */ +/* > If SENSE = 'N' or 'E', not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the QR algorithm failed to compute all the */ +/* > eigenvalues, and no eigenvectors or condition numbers */ +/* > have been computed; elements 1:ILO-1 and i+1:N of WR */ +/* > and WI contain eigenvalues which have converged. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* @precisions fortran d -> s */ + +/* > \ingroup doubleGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int dgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublereal *a, integer *lda, doublereal *wr, + doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, + integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, + doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublereal + *work, integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + doublereal d__1, d__2; + + /* Local variables */ + char side[1]; + doublereal anrm; + integer ierr, itau; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer iwrk, nout; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + integer i__, k; + doublereal r__; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + integer icond; + extern logical lsame_(char *, char *); + extern doublereal dlapy2_(doublereal *, doublereal *); + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + dgebal_(char *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + doublereal cs; + logical scalea; + extern doublereal dlamch_(char *); + doublereal cscale; + extern doublereal dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal sn; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), xerbla_(char *, integer *, ftnlen); + logical select[1]; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dhseqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), dtrsna_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, integer *); + integer minwrk, maxwrk; + logical wantvl, wntsnb; + integer hswork; + logical wntsne; + doublereal smlnum; + logical lquery, wantvr, wntsnn, wntsnv; + extern /* Subroutine */ int dtrevc3_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *, integer + *); + char job[1]; + doublereal scl, dum[1], eps; + integer lwork_trevc__; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --scale; + --rconde; + --rcondv; + --work; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + wntsnn = lsame_(sense, "N"); + wntsne = lsame_(sense, "E"); + wntsnv = lsame_(sense, "V"); + wntsnb = lsame_(sense, "B"); + if (! (lsame_(balanc, "N") || lsame_(balanc, "S") || lsame_(balanc, "P") + || lsame_(balanc, "B"))) { + *info = -1; + } else if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -2; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -3; + } else if (! (wntsnn || wntsne || wntsnb || wntsnv) || (wntsne || wntsnb) + && ! (wantvl && wantvr)) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -11; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -13; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by DHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = *n + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, & + c__0, (ftnlen)6, (ftnlen)1); + + if (wantvl) { + dtrevc3_("L", "B", select, n, &a[a_offset], lda, &vl[ + vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, & + work[1], &c_n1, &ierr); + lwork_trevc__ = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = f2cmax(i__1,i__2); + dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); + } else if (wantvr) { + dtrevc3_("R", "B", select, n, &a[a_offset], lda, &vl[ + vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, & + work[1], &c_n1, &ierr); + lwork_trevc__ = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = f2cmax(i__1,i__2); + dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + } else { + if (wntsnn) { + dhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], + &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, + info); + } else { + dhseqr_("S", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], + &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, + info); + } + } + hswork = (integer) work[1]; + + if (! wantvl && ! wantvr) { + minwrk = *n << 1; + if (! wntsnn) { +/* Computing MAX */ + i__1 = minwrk, i__2 = *n * *n + *n * 6; + minwrk = f2cmax(i__1,i__2); + } + maxwrk = f2cmax(maxwrk,hswork); + if (! wntsnn) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *n + *n * 6; + maxwrk = f2cmax(i__1,i__2); + } + } else { + minwrk = *n * 3; + if (! wntsnn && ! wntsne) { +/* Computing MAX */ + i__1 = minwrk, i__2 = *n * *n + *n * 6; + minwrk = f2cmax(i__1,i__2); + } + maxwrk = f2cmax(maxwrk,hswork); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "DORGHR", + " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + if (! wntsnn && ! wntsne) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *n + *n * 6; + maxwrk = f2cmax(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3; + maxwrk = f2cmax(i__1,i__2); + } + maxwrk = f2cmax(maxwrk,minwrk); + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -21; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEEVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + icond = 0; + anrm = dlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Balance the matrix and compute ABNRM */ + + dgebal_(balanc, n, &a[a_offset], lda, ilo, ihi, &scale[1], &ierr); + *abnrm = dlange_("1", n, n, &a[a_offset], lda, dum); + if (scalea) { + dum[0] = *abnrm; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &c__1, & + ierr); + *abnrm = dum[0]; + } + +/* Reduce to upper Hessenberg form */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + itau = 1; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + dgehrd_(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, & + ierr); + + if (wantvl) { + +/* Want left eigenvectors */ +/* Copy Householder vectors to VL */ + + *(unsigned char *)side = 'L'; + dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* Generate orthogonal matrix in VL */ +/* (Workspace: need 2*N-1, prefer N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + dorghr_(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], & + i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VL */ +/* (Workspace: need 1, prefer HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[ + vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* Want left and right eigenvectors */ +/* Copy Schur vectors to VR */ + + *(unsigned char *)side = 'B'; + dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* Want right eigenvectors */ +/* Copy Householder vectors to VR */ + + *(unsigned char *)side = 'R'; + dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* Generate orthogonal matrix in VR */ +/* (Workspace: need 2*N-1, prefer N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + dorghr_(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], & + i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VR */ +/* (Workspace: need 1, prefer HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* Compute eigenvalues only */ +/* If condition numbers desired, compute Schur form */ + + if (wntsnn) { + *(unsigned char *)job = 'E'; + } else { + *(unsigned char *)job = 'S'; + } + +/* (Workspace: need 1, prefer HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_(job, "N", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO .NE. 0 from DHSEQR, then quit */ + + if (*info != 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* Compute left and/or right eigenvectors */ +/* (Workspace: need 3*N, prefer N + 2*N*NB) */ + + i__1 = *lwork - iwrk + 1; + dtrevc3_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &i__1, & + ierr); + } + +/* Compute condition numbers if desired */ +/* (Workspace: need N*N+6*N unless SENSE = 'E') */ + + if (! wntsnn) { + dtrsna_(sense, "A", select, n, &a[a_offset], lda, &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, &rconde[1], &rcondv[1], n, &nout, + &work[iwrk], n, &iwork[1], &icond); + } + + if (wantvl) { + +/* Undo balancing of left eigenvectors */ + + dgebak_(balanc, "L", n, ilo, ihi, &scale[1], n, &vl[vl_offset], ldvl, + &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vl[k + i__ * vl_dim1]; +/* Computing 2nd power */ + d__2 = vl[k + (i__ + 1) * vl_dim1]; + work[k] = d__1 * d__1 + d__2 * d__2; +/* L10: */ + } + k = idamax_(n, &work[1], &c__1); + dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], + &cs, &sn, &r__); + drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * + vl_dim1 + 1], &c__1, &cs, &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.; + } +/* L20: */ + } + } + + if (wantvr) { + +/* Undo balancing of right eigenvectors */ + + dgebak_(balanc, "R", n, ilo, ihi, &scale[1], n, &vr[vr_offset], ldvr, + &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vr[k + i__ * vr_dim1]; +/* Computing 2nd power */ + d__2 = vr[k + (i__ + 1) * vr_dim1]; + work[k] = d__1 * d__1 + d__2 * d__2; +/* L30: */ + } + k = idamax_(n, &work[1], &c__1); + dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], + &cs, &sn, &r__); + drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * + vr_dim1 + 1], &c__1, &cs, &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.; + } +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = f2cmax(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + + 1], &i__2, &ierr); + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = f2cmax(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + + 1], &i__2, &ierr); + if (*info == 0) { + if ((wntsnv || wntsnb) && icond == 0) { + dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &rcondv[ + 1], n, &ierr); + } + } else { + i__1 = *ilo - 1; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], + n, &ierr); + i__1 = *ilo - 1; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], + n, &ierr); + } + } + + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGEEVX */ + +} /* dgeevx_ */ + diff --git a/lapack-netlib/SRC/dgehd2.c b/lapack-netlib/SRC/dgehd2.c new file mode 100644 index 000000000..27e90b1e7 --- /dev/null +++ b/lapack-netlib/SRC/dgehd2.c @@ -0,0 +1,628 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEHD2 reduces a general square matrix to upper Hessenberg form using an unblocked algorithm. +*/ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEHD2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER IHI, ILO, INFO, LDA, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEHD2 reduces a real general matrix A to upper Hessenberg form H by */ +/* > an orthogonal similarity transformation: Q**T * A * Q = H . */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > It is assumed that A is already upper triangular in rows */ +/* > and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ +/* > set by a previous call to DGEBAL; otherwise they should be */ +/* > set to 1 and N respectively. See Further Details. */ +/* > 1 <= ILO <= IHI <= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the n by n general matrix to be reduced. */ +/* > On exit, the upper triangle and the first subdiagonal of A */ +/* > are overwritten with the upper Hessenberg matrix H, and the */ +/* > elements below the first subdiagonal, with the array TAU, */ +/* > represent the orthogonal matrix Q as a product of elementary */ +/* > reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of (ihi-ilo) elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */ +/* > exit in A(i+2:ihi,i), and tau in TAU(i). */ +/* > */ +/* > The contents of A are illustrated by the following example, with */ +/* > n = 7, ilo = 2 and ihi = 6: */ +/* > */ +/* > on entry, on exit, */ +/* > */ +/* > ( a a a a a a a ) ( a a h h h h a ) */ +/* > ( a a a a a a ) ( a h h h h a ) */ +/* > ( a a a a a a ) ( h h h h h h ) */ +/* > ( a a a a a a ) ( v2 h h h h h ) */ +/* > ( a a a a a a ) ( v2 v3 h h h h ) */ +/* > ( a a a a a a ) ( v2 v3 v4 h h h ) */ +/* > ( a ) ( a ) */ +/* > */ +/* > where a denotes an element of the original matrix A, h denotes a */ +/* > modified element of the upper Hessenberg matrix H, and vi denotes an */ +/* > element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen); + doublereal aii; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) { + *info = -2; + } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEHD2", &i__1, (ftnlen)6); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + aii = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + a[i__ + 1 + i__ * a_dim1] = aii; +/* L10: */ + } + + return 0; + +/* End of DGEHD2 */ + +} /* dgehd2_ */ + diff --git a/lapack-netlib/SRC/dgehrd.c b/lapack-netlib/SRC/dgehrd.c new file mode 100644 index 000000000..aff1f6691 --- /dev/null +++ b/lapack-netlib/SRC/dgehrd.c @@ -0,0 +1,789 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEHRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEHRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER IHI, ILO, INFO, LDA, LWORK, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEHRD reduces a real general matrix A to upper Hessenberg form H by */ +/* > an orthogonal similarity transformation: Q**T * A * Q = H . */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > It is assumed that A is already upper triangular in rows */ +/* > and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ +/* > set by a previous call to DGEBAL; otherwise they should be */ +/* > set to 1 and N respectively. See Further Details. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the N-by-N general matrix to be reduced. */ +/* > On exit, the upper triangle and the first subdiagonal of A */ +/* > are overwritten with the upper Hessenberg matrix H, and the */ +/* > elements below the first subdiagonal, with the array TAU, */ +/* > represent the orthogonal matrix Q as a product of elementary */ +/* > reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */ +/* > zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (LWORK) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of (ihi-ilo) elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */ +/* > exit in A(i+2:ihi,i), and tau in TAU(i). */ +/* > */ +/* > The contents of A are illustrated by the following example, with */ +/* > n = 7, ilo = 2 and ihi = 6: */ +/* > */ +/* > on entry, on exit, */ +/* > */ +/* > ( a a a a a a a ) ( a a h h h h a ) */ +/* > ( a a a a a a ) ( a h h h h a ) */ +/* > ( a a a a a a ) ( h h h h h h ) */ +/* > ( a a a a a a ) ( v2 h h h h h ) */ +/* > ( a a a a a a ) ( v2 v3 h h h h ) */ +/* > ( a a a a a a ) ( v2 v3 v4 h h h ) */ +/* > ( a ) ( a ) */ +/* > */ +/* > where a denotes an element of the original matrix A, h denotes a */ +/* > modified element of the upper Hessenberg matrix H, and vi denotes an */ +/* > element of the vector defining H(i). */ +/* > */ +/* > This file is a slight modification of LAPACK-3.0's DGEHRD */ +/* > subroutine incorporating improvements proposed by Quintana-Orti and */ +/* > Van de Geijn (2006). (See DLAHR2.) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer nbmin, iinfo; + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), daxpy_( + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *), dgehd2_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), dlahr2_( + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + integer ib; + doublereal ei; + integer nb, nh; + extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer nx; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + integer iwt; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) { + *info = -2; + } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = f2cmin(i__1,i__2); + lwkopt = *n * nb + 4160; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEHRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = f2cmax(1,*ihi); i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.; + return 0; + } + +/* Determine the block size */ + +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = f2cmin(i__1,i__2); + nbmin = 2; + if (nb > 1 && nb < nh) { + +/* Determine when to cross over from blocked to unblocked code */ +/* (last block is always handled by unblocked code) */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code */ + + if (*lwork < *n * nb + 4160) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + if (*lwork >= *n * nbmin + 4160) { + nb = (*lwork - 4160) / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + iwt = *n * nb + 1; + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = f2cmin(i__3,i__4); + +/* Reduce columns i:i+ib-1 to Hessenberg form, returning the */ +/* matrices V and T of the block reflector H = I - V*T*V**T */ +/* which performs the reduction, and also the matrix Y = A*V*T */ + + dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], & + work[iwt], &c__65, &work[1], &ldwork); + +/* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */ +/* right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set */ +/* to 1 */ + + ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; + a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.; + i__3 = *ihi - i__ - ib + 1; + dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b25, & + work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, & + c_b26, &a[(i__ + ib) * a_dim1 + 1], lda); + a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; + +/* Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */ +/* right */ + + i__3 = ib - 1; + dtrmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b26, + &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork); + i__3 = ib - 2; + for (j = 0; j <= i__3; ++j) { + daxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + + j + 1) * a_dim1 + 1], &c__1); +/* L30: */ + } + +/* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */ +/* left */ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, &work[iwt], & + c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], & + ldwork); +/* L40: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DGEHRD */ + +} /* dgehrd_ */ + diff --git a/lapack-netlib/SRC/dgejsv.c b/lapack-netlib/SRC/dgejsv.c new file mode 100644 index 000000000..6ba003ef9 --- /dev/null +++ b/lapack-netlib/SRC/dgejsv.c @@ -0,0 +1,2682 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) ceil(w) +#define myhuge_(w) HUGE_VAL +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEJSV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEJSV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, */ +/* M, N, A, LDA, SVA, U, LDU, V, LDV, */ +/* WORK, LWORK, IWORK, INFO ) */ + +/* IMPLICIT NONE */ +/* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N */ +/* DOUBLE PRECISION A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ), */ +/* $ WORK( LWORK ) */ +/* INTEGER IWORK( * ) */ +/* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEJSV computes the singular value decomposition (SVD) of a real M-by-N */ +/* > matrix [A], where M >= N. The SVD of [A] is written as */ +/* > */ +/* > [A] = [U] * [SIGMA] * [V]^t, */ +/* > */ +/* > where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N */ +/* > diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and */ +/* > [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are */ +/* > the singular values of [A]. The columns of [U] and [V] are the left and */ +/* > the right singular vectors of [A], respectively. The matrices [U] and [V] */ +/* > are computed and stored in the arrays U and V, respectively. The diagonal */ +/* > of [SIGMA] is computed and stored in the array SVA. */ +/* > DGEJSV can sometimes compute tiny singular values and their singular vectors much */ +/* > more accurately than other SVD routines, see below under Further Details. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBA */ +/* > \verbatim */ +/* > JOBA is CHARACTER*1 */ +/* > Specifies the level of accuracy: */ +/* > = 'C': This option works well (high relative accuracy) if A = B * D, */ +/* > with well-conditioned B and arbitrary diagonal matrix D. */ +/* > The accuracy cannot be spoiled by COLUMN scaling. The */ +/* > accuracy of the computed output depends on the condition of */ +/* > B, and the procedure aims at the best theoretical accuracy. */ +/* > The relative error max_{i=1:N}|d sigma_i| / sigma_i is */ +/* > bounded by f(M,N)*epsilon* cond(B), independent of D. */ +/* > The input matrix is preprocessed with the QRF with column */ +/* > pivoting. This initial preprocessing and preconditioning by */ +/* > a rank revealing QR factorization is common for all values of */ +/* > JOBA. Additional actions are specified as follows: */ +/* > = 'E': Computation as with 'C' with an additional estimate of the */ +/* > condition number of B. It provides a realistic error bound. */ +/* > = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings */ +/* > D1, D2, and well-conditioned matrix C, this option gives */ +/* > higher accuracy than the 'C' option. If the structure of the */ +/* > input matrix is not known, and relative accuracy is */ +/* > desirable, then this option is advisable. The input matrix A */ +/* > is preprocessed with QR factorization with FULL (row and */ +/* > column) pivoting. */ +/* > = 'G': Computation as with 'F' with an additional estimate of the */ +/* > condition number of B, where A=D*B. If A has heavily weighted */ +/* > rows, then using this condition number gives too pessimistic */ +/* > error bound. */ +/* > = 'A': Small singular values are the noise and the matrix is treated */ +/* > as numerically rank deficient. The error in the computed */ +/* > singular values is bounded by f(m,n)*epsilon*||A||. */ +/* > The computed SVD A = U * S * V^t restores A up to */ +/* > f(m,n)*epsilon*||A||. */ +/* > This gives the procedure the licence to discard (set to zero) */ +/* > all singular values below N*epsilon*||A||. */ +/* > = 'R': Similar as in 'A'. Rank revealing property of the initial */ +/* > QR factorization is used do reveal (using triangular factor) */ +/* > a gap sigma_{r+1} < epsilon * sigma_r in which case the */ +/* > numerical RANK is declared to be r. The SVD is computed with */ +/* > absolute error bounds, but more accurately than with 'A'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies whether to compute the columns of U: */ +/* > = 'U': N columns of U are returned in the array U. */ +/* > = 'F': full set of M left sing. vectors is returned in the array U. */ +/* > = 'W': U may be used as workspace of length M*N. See the description */ +/* > of U. */ +/* > = 'N': U is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether to compute the matrix V: */ +/* > = 'V': N columns of V are returned in the array V; Jacobi rotations */ +/* > are not explicitly accumulated. */ +/* > = 'J': N columns of V are returned in the array V, but they are */ +/* > computed as the product of Jacobi rotations. This option is */ +/* > allowed only if JOBU .NE. 'N', i.e. in computing the full SVD. */ +/* > = 'W': V may be used as workspace of length N*N. See the description */ +/* > of V. */ +/* > = 'N': V is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBR */ +/* > \verbatim */ +/* > JOBR is CHARACTER*1 */ +/* > Specifies the RANGE for the singular values. Issues the licence to */ +/* > set to zero small positive singular values if they are outside */ +/* > specified range. If A .NE. 0 is scaled so that the largest singular */ +/* > value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues */ +/* > the licence to kill columns of A whose norm in c*A is less than */ +/* > DSQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN, */ +/* > where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). */ +/* > = 'N': Do not kill small columns of c*A. This option assumes that */ +/* > BLAS and QR factorizations and triangular solvers are */ +/* > implemented to work in that range. If the condition of A */ +/* > is greater than BIG, use DGESVJ. */ +/* > = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)] */ +/* > (roughly, as described above). This option is recommended. */ +/* > ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +/* > For computing the singular values in the FULL range [SFMIN,BIG] */ +/* > use DGESVJ. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBT */ +/* > \verbatim */ +/* > JOBT is CHARACTER*1 */ +/* > If the matrix is square then the procedure may determine to use */ +/* > transposed A if A^t seems to be better with respect to convergence. */ +/* > If the matrix is not square, JOBT is ignored. This is subject to */ +/* > changes in the future. */ +/* > The decision is based on two values of entropy over the adjoint */ +/* > orbit of A^t * A. See the descriptions of WORK(6) and WORK(7). */ +/* > = 'T': transpose if entropy test indicates possibly faster */ +/* > convergence of Jacobi process if A^t is taken as input. If A is */ +/* > replaced with A^t, then the row pivoting is included automatically. */ +/* > = 'N': do not speculate. */ +/* > This option can be used to compute only the singular values, or the */ +/* > full SVD (U, SIGMA and V). For only one set of singular vectors */ +/* > (U or V), the caller should provide both U and V, as one of the */ +/* > matrices is used as workspace if the matrix A is transposed. */ +/* > The implementer can easily remove this constraint and make the */ +/* > code more complicated. See the descriptions of U and V. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBP */ +/* > \verbatim */ +/* > JOBP is CHARACTER*1 */ +/* > Issues the licence to introduce structured perturbations to drown */ +/* > denormalized numbers. This licence should be active if the */ +/* > denormals are poorly implemented, causing slow computation, */ +/* > especially in cases of fast convergence (!). For details see [1,2]. */ +/* > For the sake of simplicity, this perturbations are included only */ +/* > when the full SVD or only the singular values are requested. The */ +/* > implementer/user can easily add the perturbation for the cases of */ +/* > computing one set of singular vectors. */ +/* > = 'P': introduce perturbation */ +/* > = 'N': do not perturb */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SVA */ +/* > \verbatim */ +/* > SVA is DOUBLE PRECISION array, dimension (N) */ +/* > On exit, */ +/* > - For WORK(1)/WORK(2) = ONE: The singular values of A. During the */ +/* > computation SVA contains Euclidean column norms of the */ +/* > iterated matrices in the array A. */ +/* > - For WORK(1) .NE. WORK(2): The singular values of A are */ +/* > (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if */ +/* > sigma_max(A) overflows or if small singular values have been */ +/* > saved from underflow by scaling the input matrix A. */ +/* > - If JOBR='R' then some of the singular values may be returned */ +/* > as exact zeros obtained by "set to zero" because they are */ +/* > below the numerical rank threshold or are denormalized numbers. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension ( LDU, N ) */ +/* > If JOBU = 'U', then U contains on exit the M-by-N matrix of */ +/* > the left singular vectors. */ +/* > If JOBU = 'F', then U contains on exit the M-by-M matrix of */ +/* > the left singular vectors, including an ONB */ +/* > of the orthogonal complement of the Range(A). */ +/* > If JOBU = 'W' .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N), */ +/* > then U is used as workspace if the procedure */ +/* > replaces A with A^t. In that case, [V] is computed */ +/* > in U as left singular vectors of A^t and then */ +/* > copied back to the V array. This 'W' option is just */ +/* > a reminder to the caller that in this case U is */ +/* > reserved as workspace of length N*N. */ +/* > If JOBU = 'N' U is not referenced, unless JOBT='T'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U, LDU >= 1. */ +/* > IF JOBU = 'U' or 'F' or 'W', then LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension ( LDV, N ) */ +/* > If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of */ +/* > the right singular vectors; */ +/* > If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N), */ +/* > then V is used as workspace if the pprocedure */ +/* > replaces A with A^t. In that case, [U] is computed */ +/* > in V as right singular vectors of A^t and then */ +/* > copied back to the U array. This 'W' option is just */ +/* > a reminder to the caller that in this case V is */ +/* > reserved as workspace of length N*N. */ +/* > If JOBV = 'N' V is not referenced, unless JOBT='T'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V' or 'J' or 'W', then LDV >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (LWORK) */ +/* > On exit, if N > 0 .AND. M > 0 (else not referenced), */ +/* > WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such */ +/* > that SCALE*SVA(1:N) are the computed singular values */ +/* > of A. (See the description of SVA().) */ +/* > WORK(2) = See the description of WORK(1). */ +/* > WORK(3) = SCONDA is an estimate for the condition number of */ +/* > column equilibrated A. (If JOBA = 'E' or 'G') */ +/* > SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1). */ +/* > It is computed using DPOCON. It holds */ +/* > N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ +/* > where R is the triangular factor from the QRF of A. */ +/* > However, if R is truncated and the numerical rank is */ +/* > determined to be strictly smaller than N, SCONDA is */ +/* > returned as -1, thus indicating that the smallest */ +/* > singular values might be lost. */ +/* > */ +/* > If full SVD is needed, the following two condition numbers are */ +/* > useful for the analysis of the algorithm. They are provied for */ +/* > a developer/implementer who is familiar with the details of */ +/* > the method. */ +/* > */ +/* > WORK(4) = an estimate of the scaled condition number of the */ +/* > triangular factor in the first QR factorization. */ +/* > WORK(5) = an estimate of the scaled condition number of the */ +/* > triangular factor in the second QR factorization. */ +/* > The following two parameters are computed if JOBT = 'T'. */ +/* > They are provided for a developer/implementer who is familiar */ +/* > with the details of the method. */ +/* > */ +/* > WORK(6) = the entropy of A^t*A :: this is the Shannon entropy */ +/* > of diag(A^t*A) / Trace(A^t*A) taken as point in the */ +/* > probability simplex. */ +/* > WORK(7) = the entropy of A*A^t. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > Length of WORK to confirm proper allocation of work space. */ +/* > LWORK depends on the job: */ +/* > */ +/* > If only SIGMA is needed (JOBU = 'N', JOBV = 'N') and */ +/* > -> .. no scaled condition estimate required (JOBE = 'N'): */ +/* > LWORK >= f2cmax(2*M+N,4*N+1,7). This is the minimal requirement. */ +/* > ->> For optimal performance (blocked code) the optimal value */ +/* > is LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal */ +/* > block size for DGEQP3 and DGEQRF. */ +/* > In general, optimal LWORK is computed as */ +/* > LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7). */ +/* > -> .. an estimate of the scaled condition number of A is */ +/* > required (JOBA='E', 'G'). In this case, LWORK is the maximum */ +/* > of the above and N*N+4*N, i.e. LWORK >= f2cmax(2*M+N,N*N+4*N,7). */ +/* > ->> For optimal performance (blocked code) the optimal value */ +/* > is LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7). */ +/* > In general, the optimal length LWORK is computed as */ +/* > LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), */ +/* > N+N*N+LWORK(DPOCON),7). */ +/* > */ +/* > If SIGMA and the right singular vectors are needed (JOBV = 'V'), */ +/* > -> the minimal requirement is LWORK >= f2cmax(2*M+N,4*N+1,7). */ +/* > -> For optimal performance, LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7), */ +/* > where NB is the optimal block size for DGEQP3, DGEQRF, DGELQF, */ +/* > DORMLQ. In general, the optimal length LWORK is computed as */ +/* > LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON), */ +/* > N+LWORK(DGELQF), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)). */ +/* > */ +/* > If SIGMA and the left singular vectors are needed */ +/* > -> the minimal requirement is LWORK >= f2cmax(2*M+N,4*N+1,7). */ +/* > -> For optimal performance: */ +/* > if JOBU = 'U' :: LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7), */ +/* > if JOBU = 'F' :: LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,N+M*NB,7), */ +/* > where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR. */ +/* > In general, the optimal length LWORK is computed as */ +/* > LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON), */ +/* > 2*N+LWORK(DGEQRF), N+LWORK(DORMQR)). */ +/* > Here LWORK(DORMQR) equals N*NB (for JOBU = 'U') or */ +/* > M*NB (for JOBU = 'F'). */ +/* > */ +/* > If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and */ +/* > -> if JOBV = 'V' */ +/* > the minimal requirement is LWORK >= f2cmax(2*M+N,6*N+2*N*N). */ +/* > -> if JOBV = 'J' the minimal requirement is */ +/* > LWORK >= f2cmax(2*M+N, 4*N+N*N,2*N+N*N+6). */ +/* > -> For optimal performance, LWORK should be additionally */ +/* > larger than N+M*NB, where NB is the optimal block size */ +/* > for DORMQR. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (M+3*N). */ +/* > On exit, */ +/* > IWORK(1) = the numerical rank determined after the initial */ +/* > QR factorization with pivoting. See the descriptions */ +/* > of JOBA and JOBR. */ +/* > IWORK(2) = the number of the computed nonzero singular values */ +/* > IWORK(3) = if nonzero, a warning message: */ +/* > If IWORK(3) = 1 then some of the column norms of A */ +/* > were denormalized floats. The requested high accuracy */ +/* > is not warranted by the data. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value. */ +/* > = 0: successful exit; */ +/* > > 0: DGEJSV did not converge in the maximal allowed number */ +/* > of sweeps. The computed values may be inaccurate. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup doubleGEsing */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses DGEQP3, */ +/* > DGEQRF, and DGELQF as preprocessors and preconditioners. Optionally, an */ +/* > additional row pivoting can be used as a preprocessor, which in some */ +/* > cases results in much higher accuracy. An example is matrix A with the */ +/* > structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned */ +/* > diagonal matrices and C is well-conditioned matrix. In that case, complete */ +/* > pivoting in the first QR factorizations provides accuracy dependent on the */ +/* > condition number of C, and independent of D1, D2. Such higher accuracy is */ +/* > not completely understood theoretically, but it works well in practice. */ +/* > Further, if A can be written as A = B*D, with well-conditioned B and some */ +/* > diagonal D, then the high accuracy is guaranteed, both theoretically and */ +/* > in software, independent of D. For more details see [1], [2]. */ +/* > The computational range for the singular values can be the full range */ +/* > ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS */ +/* > & LAPACK routines called by DGEJSV are implemented to work in that range. */ +/* > If that is not the case, then the restriction for safe computation with */ +/* > the singular values in the range of normalized IEEE numbers is that the */ +/* > spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not */ +/* > overflow. This code (DGEJSV) is best used in this restricted range, */ +/* > meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are */ +/* > returned as zeros. See JOBR for details on this. */ +/* > Further, this implementation is somewhat slower than the one described */ +/* > in [1,2] due to replacement of some non-LAPACK components, and because */ +/* > the choice of some tuning parameters in the iterative part (DGESVJ) is */ +/* > left to the implementer on a particular machine. */ +/* > The rank revealing QR factorization (in this code: DGEQP3) should be */ +/* > implemented as in [3]. We have a new version of DGEQP3 under development */ +/* > that is more robust than the current one in LAPACK, with a cleaner cut in */ +/* > rank deficient cases. It will be available in the SIGMA library [4]. */ +/* > If M is much larger than N, it is obvious that the initial QRF with */ +/* > column pivoting can be preprocessed by the QRF without pivoting. That */ +/* > well known trick is not used in DGEJSV because in some cases heavy row */ +/* > weighting can be treated with complete pivoting. The overhead in cases */ +/* > M much larger than N is then only due to pivoting, but the benefits in */ +/* > terms of accuracy have prevailed. The implementer/user can incorporate */ +/* > this extra QRF step easily. The implementer can also improve data movement */ +/* > (matrix transpose, matrix copy, matrix transposed copy) - this */ +/* > implementation of DGEJSV uses only the simplest, naive data movement. */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ + +/* > \par References: */ +/* ================ */ +/* > */ +/* > \verbatim */ +/* > */ +/* > [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */ +/* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */ +/* > LAPACK Working note 169. */ +/* > [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */ +/* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */ +/* > LAPACK Working note 170. */ +/* > [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR */ +/* > factorization software - a case study. */ +/* > ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. */ +/* > LAPACK Working note 176. */ +/* > [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */ +/* > QSVD, (H,K)-SVD computations. */ +/* > Department of Mathematics, University of Zagreb, 2008. */ +/* > \endverbatim */ + +/* > \par Bugs, examples and comments: */ +/* ================================= */ +/* > */ +/* > Please report all bugs and send interesting examples and/or comments to */ +/* > drmac@math.hr. Thank you. */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgejsv_(char *joba, char *jobu, char *jobv, char *jobr, + char *jobt, char *jobp, integer *m, integer *n, doublereal *a, + integer *lda, doublereal *sva, doublereal *u, integer *ldu, + doublereal *v, integer *ldv, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, + i__3, i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11, i__12; + doublereal d__1, d__2, d__3, d__4; + + /* Local variables */ + logical defr; + doublereal aapp, aaqq; + logical kill; + integer ierr; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + doublereal temp1; + integer p, q; + logical jracc; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + doublereal small, entra, sfmin; + logical lsvec; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dswap_(integer *, doublereal *, integer + *, doublereal *, integer *); + doublereal epsln; + logical rsvec; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer n1; + logical l2aber; + extern /* Subroutine */ int dgeqp3_(integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal condr1, condr2, uscal1, uscal2; + logical l2kill, l2rank, l2tran, l2pert; + extern doublereal dlamch_(char *); + integer nr; + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + doublereal scalem; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + doublereal sconda; + logical goscal; + doublereal aatmin; + extern /* Subroutine */ int dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + doublereal aatmax; + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), xerbla_(char *, integer *, ftnlen); + logical noscal; + extern /* Subroutine */ int dpocon_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *), dgesvj_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), dlassq_(integer *, doublereal *, integer + *, doublereal *, doublereal *), dlaswp_(integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *); + doublereal entrat; + logical almort; + extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dormlq_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + doublereal maxprj; + logical errest; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + logical transp, rowpiv; + doublereal big, cond_ok__, xsc, big1; + integer warning, numrank; + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* =========================================================================== */ + + + +/* Test the input arguments */ + + /* Parameter adjustments */ + --sva; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --work; + --iwork; + + /* Function Body */ + lsvec = lsame_(jobu, "U") || lsame_(jobu, "F"); + jracc = lsame_(jobv, "J"); + rsvec = lsame_(jobv, "V") || jracc; + rowpiv = lsame_(joba, "F") || lsame_(joba, "G"); + l2rank = lsame_(joba, "R"); + l2aber = lsame_(joba, "A"); + errest = lsame_(joba, "E") || lsame_(joba, "G"); + l2tran = lsame_(jobt, "T"); + l2kill = lsame_(jobr, "R"); + defr = lsame_(jobr, "N"); + l2pert = lsame_(jobp, "P"); + + if (! (rowpiv || l2rank || l2aber || errest || lsame_(joba, "C"))) { + *info = -1; + } else if (! (lsvec || lsame_(jobu, "N") || lsame_( + jobu, "W"))) { + *info = -2; + } else if (! (rsvec || lsame_(jobv, "N") || lsame_( + jobv, "W")) || jracc && ! lsvec) { + *info = -3; + } else if (! (l2kill || defr)) { + *info = -4; + } else if (! (l2tran || lsame_(jobt, "N"))) { + *info = -5; + } else if (! (l2pert || lsame_(jobp, "N"))) { + *info = -6; + } else if (*m < 0) { + *info = -7; + } else if (*n < 0 || *n > *m) { + *info = -8; + } else if (*lda < *m) { + *info = -10; + } else if (lsvec && *ldu < *m) { + *info = -13; + } else if (rsvec && *ldv < *n) { + *info = -15; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 7, i__2 = (*n << 2) + 1, i__1 = f2cmax(i__1,i__2), i__2 = (*m << + 1) + *n; +/* Computing MAX */ + i__3 = 7, i__4 = (*n << 2) + *n * *n, i__3 = f2cmax(i__3,i__4), i__4 = (* + m << 1) + *n; +/* Computing MAX */ + i__5 = 7, i__6 = (*m << 1) + *n, i__5 = f2cmax(i__5,i__6), i__6 = (*n << + 2) + 1; +/* Computing MAX */ + i__7 = 7, i__8 = (*m << 1) + *n, i__7 = f2cmax(i__7,i__8), i__8 = (*n << + 2) + 1; +/* Computing MAX */ + i__9 = (*m << 1) + *n, i__10 = *n * 6 + (*n << 1) * *n; +/* Computing MAX */ + i__11 = (*m << 1) + *n, i__12 = (*n << 2) + *n * *n, i__11 = f2cmax( + i__11,i__12), i__12 = (*n << 1) + *n * *n + 6; + if (! (lsvec || rsvec || errest) && *lwork < f2cmax(i__1,i__2) || ! ( + lsvec || rsvec) && errest && *lwork < f2cmax(i__3,i__4) || lsvec + && ! rsvec && *lwork < f2cmax(i__5,i__6) || rsvec && ! lsvec && * + lwork < f2cmax(i__7,i__8) || lsvec && rsvec && ! jracc && *lwork + < f2cmax(i__9,i__10) || lsvec && rsvec && jracc && *lwork < f2cmax( + i__11,i__12)) { + *info = -17; + } else { +/* #:) */ + *info = 0; + } + } + + if (*info != 0) { +/* #:( */ + i__1 = -(*info); + xerbla_("DGEJSV", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return for void matrix (Y3K safe) */ +/* #:) */ + if (*m == 0 || *n == 0) { + iwork[1] = 0; + iwork[2] = 0; + iwork[3] = 0; + work[1] = 0.; + work[2] = 0.; + work[3] = 0.; + work[4] = 0.; + work[5] = 0.; + work[6] = 0.; + work[7] = 0.; + return 0; + } + +/* Determine whether the matrix U should be M x N or M x M */ + + if (lsvec) { + n1 = *n; + if (lsame_(jobu, "F")) { + n1 = *m; + } + } + +/* Set numerical parameters */ + +/* ! NOTE: Make sure DLAMCH() does not fail on the target architecture. */ + + epsln = dlamch_("Epsilon"); + sfmin = dlamch_("SafeMinimum"); + small = sfmin / epsln; + big = dlamch_("O"); +/* BIG = ONE / SFMIN */ + +/* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N */ + +/* (!) If necessary, scale SVA() to protect the largest norm from */ +/* overflow. It is possible that this scaling pushes the smallest */ +/* column norm left from the underflow threshold (extreme case). */ + + scalem = 1. / sqrt((doublereal) (*m) * (doublereal) (*n)); + noscal = TRUE_; + goscal = TRUE_; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 1.; + dlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -9; + i__2 = -(*info); + xerbla_("DGEJSV", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscal) { + sva[p] = aapp * aaqq; + } else { + noscal = FALSE_; + sva[p] = aapp * (aaqq * scalem); + if (goscal) { + goscal = FALSE_; + i__2 = p - 1; + dscal_(&i__2, &scalem, &sva[1], &c__1); + } + } +/* L1874: */ + } + + if (noscal) { + scalem = 1.; + } + + aapp = 0.; + aaqq = big; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { +/* Computing MAX */ + d__1 = aapp, d__2 = sva[p]; + aapp = f2cmax(d__1,d__2); + if (sva[p] != 0.) { +/* Computing MIN */ + d__1 = aaqq, d__2 = sva[p]; + aaqq = f2cmin(d__1,d__2); + } +/* L4781: */ + } + +/* Quick return for zero M x N matrix */ +/* #:) */ + if (aapp == 0.) { + if (lsvec) { + dlaset_("G", m, &n1, &c_b34, &c_b35, &u[u_offset], ldu) + ; + } + if (rsvec) { + dlaset_("G", n, n, &c_b34, &c_b35, &v[v_offset], ldv); + } + work[1] = 1.; + work[2] = 1.; + if (errest) { + work[3] = 1.; + } + if (lsvec && rsvec) { + work[4] = 1.; + work[5] = 1.; + } + if (l2tran) { + work[6] = 0.; + work[7] = 0.; + } + iwork[1] = 0; + iwork[2] = 0; + iwork[3] = 0; + return 0; + } + +/* Issue warning if denormalized column norms detected. Override the */ +/* high relative accuracy request. Issue licence to kill columns */ +/* (set them to zero) whose norm is less than sigma_max / BIG (roughly). */ +/* #:( */ + warning = 0; + if (aaqq <= sfmin) { + l2rank = TRUE_; + l2kill = TRUE_; + warning = 1; + } + +/* Quick return for one-column matrix */ +/* #:) */ + if (*n == 1) { + + if (lsvec) { + dlascl_("G", &c__0, &c__0, &sva[1], &scalem, m, &c__1, &a[a_dim1 + + 1], lda, &ierr); + dlacpy_("A", m, &c__1, &a[a_offset], lda, &u[u_offset], ldu); +/* computing all M left singular vectors of the M x 1 matrix */ + if (n1 != *n) { + i__1 = *lwork - *n; + dgeqrf_(m, n, &u[u_offset], ldu, &work[1], &work[*n + 1], & + i__1, &ierr); + i__1 = *lwork - *n; + dorgqr_(m, &n1, &c__1, &u[u_offset], ldu, &work[1], &work[*n + + 1], &i__1, &ierr); + dcopy_(m, &a[a_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } + } + if (rsvec) { + v[v_dim1 + 1] = 1.; + } + if (sva[1] < big * scalem) { + sva[1] /= scalem; + scalem = 1.; + } + work[1] = 1. / scalem; + work[2] = 1.; + if (sva[1] != 0.) { + iwork[1] = 1; + if (sva[1] / scalem >= sfmin) { + iwork[2] = 1; + } else { + iwork[2] = 0; + } + } else { + iwork[1] = 0; + iwork[2] = 0; + } + iwork[3] = 0; + if (errest) { + work[3] = 1.; + } + if (lsvec && rsvec) { + work[4] = 1.; + work[5] = 1.; + } + if (l2tran) { + work[6] = 0.; + work[7] = 0.; + } + return 0; + + } + + transp = FALSE_; + l2tran = l2tran && *m == *n; + + aatmax = -1.; + aatmin = big; + if (rowpiv || l2tran) { + +/* Compute the row norms, needed to determine row pivoting sequence */ +/* (in the case of heavily row weighted A, row pivoting is strongly */ +/* advised) and to collect information needed to compare the */ +/* structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.). */ + + if (l2tran) { + i__1 = *m; + for (p = 1; p <= i__1; ++p) { + xsc = 0.; + temp1 = 1.; + dlassq_(n, &a[p + a_dim1], lda, &xsc, &temp1); +/* DLASSQ gets both the ell_2 and the ell_infinity norm */ +/* in one pass through the vector */ + work[*m + *n + p] = xsc * scalem; + work[*n + p] = xsc * (scalem * sqrt(temp1)); +/* Computing MAX */ + d__1 = aatmax, d__2 = work[*n + p]; + aatmax = f2cmax(d__1,d__2); + if (work[*n + p] != 0.) { +/* Computing MIN */ + d__1 = aatmin, d__2 = work[*n + p]; + aatmin = f2cmin(d__1,d__2); + } +/* L1950: */ + } + } else { + i__1 = *m; + for (p = 1; p <= i__1; ++p) { + work[*m + *n + p] = scalem * (d__1 = a[p + idamax_(n, &a[p + + a_dim1], lda) * a_dim1], abs(d__1)); +/* Computing MAX */ + d__1 = aatmax, d__2 = work[*m + *n + p]; + aatmax = f2cmax(d__1,d__2); +/* Computing MIN */ + d__1 = aatmin, d__2 = work[*m + *n + p]; + aatmin = f2cmin(d__1,d__2); +/* L1904: */ + } + } + + } + +/* For square matrix A try to determine whether A^t would be better */ +/* input for the preconditioned Jacobi SVD, with faster convergence. */ +/* The decision is based on an O(N) function of the vector of column */ +/* and row norms of A, based on the Shannon entropy. This should give */ +/* the right choice in most cases when the difference actually matters. */ +/* It may fail and pick the slower converging side. */ + + entra = 0.; + entrat = 0.; + if (l2tran) { + + xsc = 0.; + temp1 = 1.; + dlassq_(n, &sva[1], &c__1, &xsc, &temp1); + temp1 = 1. / temp1; + + entra = 0.; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { +/* Computing 2nd power */ + d__1 = sva[p] / xsc; + big1 = d__1 * d__1 * temp1; + if (big1 != 0.) { + entra += big1 * log(big1); + } +/* L1113: */ + } + entra = -entra / log((doublereal) (*n)); + +/* Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex. */ +/* It is derived from the diagonal of A^t * A. Do the same with the */ +/* diagonal of A * A^t, compute the entropy of the corresponding */ +/* probability distribution. Note that A * A^t and A^t * A have the */ +/* same trace. */ + + entrat = 0.; + i__1 = *n + *m; + for (p = *n + 1; p <= i__1; ++p) { +/* Computing 2nd power */ + d__1 = work[p] / xsc; + big1 = d__1 * d__1 * temp1; + if (big1 != 0.) { + entrat += big1 * log(big1); + } +/* L1114: */ + } + entrat = -entrat / log((doublereal) (*m)); + +/* Analyze the entropies and decide A or A^t. Smaller entropy */ +/* usually means better input for the algorithm. */ + + transp = entrat < entra; + +/* If A^t is better than A, transpose A. */ + + if (transp) { +/* In an optimal implementation, this trivial transpose */ +/* should be replaced with faster transpose. */ + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + temp1 = a[q + p * a_dim1]; + a[q + p * a_dim1] = a[p + q * a_dim1]; + a[p + q * a_dim1] = temp1; +/* L1116: */ + } +/* L1115: */ + } + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + work[*m + *n + p] = sva[p]; + sva[p] = work[*n + p]; +/* L1117: */ + } + temp1 = aapp; + aapp = aatmax; + aatmax = temp1; + temp1 = aaqq; + aaqq = aatmin; + aatmin = temp1; + kill = lsvec; + lsvec = rsvec; + rsvec = kill; + if (lsvec) { + n1 = *n; + } + + rowpiv = TRUE_; + } + + } +/* END IF L2TRAN */ + +/* Scale the matrix so that its maximal singular value remains less */ +/* than DSQRT(BIG) -- the matrix is scaled so that its maximal column */ +/* has Euclidean norm equal to DSQRT(BIG/N). The only reason to keep */ +/* DSQRT(BIG) instead of BIG is the fact that DGEJSV uses LAPACK and */ +/* BLAS routines that, in some implementations, are not capable of */ +/* working in the full interval [SFMIN,BIG] and that they may provoke */ +/* overflows in the intermediate results. If the singular values spread */ +/* from SFMIN to BIG, then DGESVJ will compute them. So, in that case, */ +/* one should use DGESVJ instead of DGEJSV. */ + + big1 = sqrt(big); + temp1 = sqrt(big / (doublereal) (*n)); + + dlascl_("G", &c__0, &c__0, &aapp, &temp1, n, &c__1, &sva[1], n, &ierr); + if (aaqq > aapp * sfmin) { + aaqq = aaqq / aapp * temp1; + } else { + aaqq = aaqq * temp1 / aapp; + } + temp1 *= scalem; + dlascl_("G", &c__0, &c__0, &aapp, &temp1, m, n, &a[a_offset], lda, &ierr); + +/* To undo scaling at the end of this procedure, multiply the */ +/* computed singular values with USCAL2 / USCAL1. */ + + uscal1 = temp1; + uscal2 = aapp; + + if (l2kill) { +/* L2KILL enforces computation of nonzero singular values in */ +/* the restricted range of condition number of the initial A, */ +/* sigma_max(A) / sigma_min(A) approx. DSQRT(BIG)/DSQRT(SFMIN). */ + xsc = sqrt(sfmin); + } else { + xsc = small; + +/* Now, if the condition number of A is too big, */ +/* sigma_max(A) / sigma_min(A) .GT. DSQRT(BIG/N) * EPSLN / SFMIN, */ +/* as a precaution measure, the full SVD is computed using DGESVJ */ +/* with accumulated Jacobi rotations. This provides numerically */ +/* more robust computation, at the cost of slightly increased run */ +/* time. Depending on the concrete implementation of BLAS and LAPACK */ +/* (i.e. how they behave in presence of extreme ill-conditioning) the */ +/* implementor may decide to remove this switch. */ + if (aaqq < sqrt(sfmin) && lsvec && rsvec) { + jracc = TRUE_; + } + + } + if (aaqq < xsc) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + if (sva[p] < xsc) { + dlaset_("A", m, &c__1, &c_b34, &c_b34, &a[p * a_dim1 + 1], + lda); + sva[p] = 0.; + } +/* L700: */ + } + } + +/* Preconditioning using QR factorization with pivoting */ + + if (rowpiv) { +/* Optional row permutation (Bjoerck row pivoting): */ +/* A result by Cox and Higham shows that the Bjoerck's */ +/* row pivoting combined with standard column pivoting */ +/* has similar effect as Powell-Reid complete pivoting. */ +/* The ell-infinity norms of A are made nonincreasing. */ + i__1 = *m - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *m - p + 1; + q = idamax_(&i__2, &work[*m + *n + p], &c__1) + p - 1; + iwork[(*n << 1) + p] = q; + if (p != q) { + temp1 = work[*m + *n + p]; + work[*m + *n + p] = work[*m + *n + q]; + work[*m + *n + q] = temp1; + } +/* L1952: */ + } + i__1 = *m - 1; + dlaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[(*n << 1) + 1], & + c__1); + } + +/* End of the preparation phase (scaling, optional sorting and */ +/* transposing, optional flushing of small columns). */ + +/* Preconditioning */ + +/* If the full SVD is needed, the right singular vectors are computed */ +/* from a matrix equation, and for that we need theoretical analysis */ +/* of the Businger-Golub pivoting. So we use DGEQP3 as the first RR QRF. */ +/* In all other cases the first RR QRF can be chosen by other criteria */ +/* (eg speed by replacing global with restricted window pivoting, such */ +/* as in SGEQPX from TOMS # 782). Good results will be obtained using */ +/* SGEQPX with properly (!) chosen numerical parameters. */ +/* Any improvement of DGEQP3 improves overal performance of DGEJSV. */ + +/* A * P1 = Q1 * [ R1^t 0]^t: */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + iwork[p] = 0; +/* L1963: */ + } + i__1 = *lwork - *n; + dgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &work[1], &work[*n + 1], & + i__1, &ierr); + +/* The upper triangular matrix R1 from the first QRF is inspected for */ +/* rank deficiency and possibilities for deflation, or possible */ +/* ill-conditioning. Depending on the user specified flag L2RANK, */ +/* the procedure explores possibilities to reduce the numerical */ +/* rank by inspecting the computed upper triangular factor. If */ +/* L2RANK or L2ABER are up, then DGEJSV will compute the SVD of */ +/* A + dA, where ||dA|| <= f(M,N)*EPSLN. */ + + nr = 1; + if (l2aber) { +/* Standard absolute error bound suffices. All sigma_i with */ +/* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an */ +/* aggressive enforcement of lower numerical rank by introducing a */ +/* backward error of the order of N*EPSLN*||A||. */ + temp1 = sqrt((doublereal) (*n)) * epsln; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((d__2 = a[p + p * a_dim1], abs(d__2)) >= temp1 * (d__1 = a[ + a_dim1 + 1], abs(d__1))) { + ++nr; + } else { + goto L3002; + } +/* L3001: */ + } +L3002: + ; + } else if (l2rank) { +/* Sudden drop on the diagonal of R1 is used as the criterion for */ +/* close-to-rank-deficient. */ + temp1 = sqrt(sfmin); + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((d__2 = a[p + p * a_dim1], abs(d__2)) < epsln * (d__1 = a[p - + 1 + (p - 1) * a_dim1], abs(d__1)) || (d__3 = a[p + p * + a_dim1], abs(d__3)) < small || l2kill && (d__4 = a[p + p * + a_dim1], abs(d__4)) < temp1) { + goto L3402; + } + ++nr; +/* L3401: */ + } +L3402: + + ; + } else { +/* The goal is high relative accuracy. However, if the matrix */ +/* has high scaled condition number the relative accuracy is in */ +/* general not feasible. Later on, a condition number estimator */ +/* will be deployed to estimate the scaled condition number. */ +/* Here we just remove the underflowed part of the triangular */ +/* factor. This prevents the situation in which the code is */ +/* working hard to get the accuracy not warranted by the data. */ + temp1 = sqrt(sfmin); + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((d__1 = a[p + p * a_dim1], abs(d__1)) < small || l2kill && ( + d__2 = a[p + p * a_dim1], abs(d__2)) < temp1) { + goto L3302; + } + ++nr; +/* L3301: */ + } +L3302: + + ; + } + + almort = FALSE_; + if (nr == *n) { + maxprj = 1.; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + temp1 = (d__1 = a[p + p * a_dim1], abs(d__1)) / sva[iwork[p]]; + maxprj = f2cmin(maxprj,temp1); +/* L3051: */ + } +/* Computing 2nd power */ + d__1 = maxprj; + if (d__1 * d__1 >= 1. - (doublereal) (*n) * epsln) { + almort = TRUE_; + } + } + + + sconda = -1.; + condr1 = -1.; + condr2 = -1.; + + if (errest) { + if (*n == nr) { + if (rsvec) { + dlacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = sva[iwork[p]]; + d__1 = 1. / temp1; + dscal_(&p, &d__1, &v[p * v_dim1 + 1], &c__1); +/* L3053: */ + } + dpocon_("U", n, &v[v_offset], ldv, &c_b35, &temp1, &work[*n + + 1], &iwork[(*n << 1) + *m + 1], &ierr); + } else if (lsvec) { + dlacpy_("U", n, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = sva[iwork[p]]; + d__1 = 1. / temp1; + dscal_(&p, &d__1, &u[p * u_dim1 + 1], &c__1); +/* L3054: */ + } + dpocon_("U", n, &u[u_offset], ldu, &c_b35, &temp1, &work[*n + + 1], &iwork[(*n << 1) + *m + 1], &ierr); + } else { + dlacpy_("U", n, n, &a[a_offset], lda, &work[*n + 1], n); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = sva[iwork[p]]; + d__1 = 1. / temp1; + dscal_(&p, &d__1, &work[*n + (p - 1) * *n + 1], &c__1); +/* L3052: */ + } + dpocon_("U", n, &work[*n + 1], n, &c_b35, &temp1, &work[*n + * + n * *n + 1], &iwork[(*n << 1) + *m + 1], &ierr); + } + sconda = 1. / sqrt(temp1); +/* SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1). */ +/* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ + } else { + sconda = -1.; + } + } + + l2pert = l2pert && (d__1 = a[a_dim1 + 1] / a[nr + nr * a_dim1], abs(d__1)) + > sqrt(big1); +/* If there is no violent scaling, artificial perturbation is not needed. */ + +/* Phase 3: */ + + if (! (rsvec || lsvec)) { + +/* Singular Values only */ + +/* Computing MIN */ + i__2 = *n - 1; + i__1 = f2cmin(i__2,nr); + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p; + dcopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * + a_dim1], &c__1); +/* L1946: */ + } + +/* The following two DO-loops introduce small relative perturbation */ +/* into the strict upper triangle of the lower triangular matrix. */ +/* Small entries below the main diagonal are also changed. */ +/* This modification is useful if the computing environment does not */ +/* provide/allow FLUSH TO ZERO underflow, for it prevents many */ +/* annoying denormalized numbers in case of strongly scaled matrices. */ +/* The perturbation is structured so that it does not introduce any */ +/* new perturbation of the singular values, and it does not destroy */ +/* the job done by the preconditioner. */ +/* The licence for this perturbation is in the variable L2PERT, which */ +/* should be .FALSE. if FLUSH TO ZERO underflow is active. */ + + if (! almort) { + + if (l2pert) { +/* XSC = DSQRT(SMALL) */ + xsc = epsln / (doublereal) (*n); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (d__1 = a[q + q * a_dim1], abs(d__1)); + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + if (p > q && (d__1 = a[p + q * a_dim1], abs(d__1)) <= + temp1 || p < q) { + a[p + q * a_dim1] = d_sign(&temp1, &a[p + q * + a_dim1]); + } +/* L4949: */ + } +/* L4947: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + + 1], lda); + } + + + i__1 = *lwork - *n; + dgeqrf_(n, &nr, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1, + &ierr); + + i__1 = nr - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p; + dcopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * + a_dim1], &c__1); +/* L1948: */ + } + + } + +/* Row-cyclic Jacobi SVD algorithm with column pivoting */ + +/* to drown denormals */ + if (l2pert) { +/* XSC = DSQRT(SMALL) */ + xsc = epsln / (doublereal) (*n); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (d__1 = a[q + q * a_dim1], abs(d__1)); + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + if (p > q && (d__1 = a[p + q * a_dim1], abs(d__1)) <= + temp1 || p < q) { + a[p + q * a_dim1] = d_sign(&temp1, &a[p + q * a_dim1]) + ; + } +/* L1949: */ + } +/* L1947: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + 1], + lda); + } + +/* triangular matrix (plus perturbation which is ignored in */ +/* the part which destroys triangular form (confusing?!)) */ + + dgesvj_("L", "NoU", "NoV", &nr, &nr, &a[a_offset], lda, &sva[1], n, & + v[v_offset], ldv, &work[1], lwork, info); + + scalem = work[1]; + numrank = i_dnnt(&work[2]); + + + } else if (rsvec && ! lsvec) { + +/* -> Singular Values and Right Singular Vectors <- */ + + if (almort) { + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], & + c__1); +/* L1998: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + + dgesvj_("L", "U", "N", n, &nr, &v[v_offset], ldv, &sva[1], &nr, & + a[a_offset], lda, &work[1], lwork, info); + scalem = work[1]; + numrank = i_dnnt(&work[2]); + } else { + +/* accumulated product of Jacobi rotations, three are perfect ) */ + + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &a[a_dim1 + 2], + lda); + i__1 = *lwork - *n; + dgelqf_(&nr, n, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1, + &ierr); + dlacpy_("Lower", &nr, &nr, &a[a_offset], lda, &v[v_offset], ldv); + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + i__1 = *lwork - (*n << 1); + dgeqrf_(&nr, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << + 1) + 1], &i__1, &ierr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p + 1; + dcopy_(&i__2, &v[p + p * v_dim1], ldv, &v[p + p * v_dim1], & + c__1); +/* L8998: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + + dgesvj_("Lower", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[1], & + nr, &u[u_offset], ldu, &work[*n + 1], lwork, info); + scalem = work[*n + 1]; + numrank = i_dnnt(&work[*n + 2]); + if (nr < *n) { + i__1 = *n - nr; + dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], + ldv); + i__1 = *n - nr; + dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + + 1) * v_dim1], ldv); + } + + i__1 = *lwork - *n; + dormlq_("Left", "Transpose", n, n, &nr, &a[a_offset], lda, &work[ + 1], &v[v_offset], ldv, &work[*n + 1], &i__1, &ierr); + + } + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + dcopy_(n, &v[p + v_dim1], ldv, &a[iwork[p] + a_dim1], lda); +/* L8991: */ + } + dlacpy_("All", n, n, &a[a_offset], lda, &v[v_offset], ldv); + + if (transp) { + dlacpy_("All", n, n, &v[v_offset], ldv, &u[u_offset], ldu); + } + + } else if (lsvec && ! rsvec) { + + +/* Jacobi rotations in the Jacobi iterations. */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + dcopy_(&i__2, &a[p + p * a_dim1], lda, &u[p + p * u_dim1], &c__1); +/* L1965: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], + ldu); + + i__1 = *lwork - (*n << 1); + dgeqrf_(n, &nr, &u[u_offset], ldu, &work[*n + 1], &work[(*n << 1) + 1] + , &i__1, &ierr); + + i__1 = nr - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p; + dcopy_(&i__2, &u[p + (p + 1) * u_dim1], ldu, &u[p + 1 + p * + u_dim1], &c__1); +/* L1967: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], + ldu); + + i__1 = *lwork - *n; + dgesvj_("Lower", "U", "N", &nr, &nr, &u[u_offset], ldu, &sva[1], &nr, + &a[a_offset], lda, &work[*n + 1], &i__1, info); + scalem = work[*n + 1]; + numrank = i_dnnt(&work[*n + 2]); + + if (nr < *m) { + i__1 = *m - nr; + dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * u_dim1 + + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (nr + + 1) * u_dim1], ldu); + } + } + + i__1 = *lwork - *n; + dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], &u[ + u_offset], ldu, &work[*n + 1], &i__1, &ierr); + + if (rowpiv) { + i__1 = *m - 1; + dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + + 1], &c_n1); + } + + i__1 = n1; + for (p = 1; p <= i__1; ++p) { + xsc = 1. / dnrm2_(m, &u[p * u_dim1 + 1], &c__1); + dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); +/* L1974: */ + } + + if (transp) { + dlacpy_("All", n, n, &u[u_offset], ldu, &v[v_offset], ldv); + } + + } else { + + + if (! jracc) { + + if (! almort) { + +/* Second Preconditioning Step (QRF [with pivoting]) */ +/* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is */ +/* equivalent to an LQF CALL. Since in many libraries the QRF */ +/* seems to be better optimized than the LQF, we do explicit */ +/* transpose and use the QRF. This is subject to changes in an */ +/* optimized implementation of DGEJSV. */ + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], + &c__1); +/* L1968: */ + } + +/* denormals in the second QR factorization, where they are */ +/* as good as zeros. This is done to avoid painfully slow */ +/* computation with denormals. The relative size of the perturbation */ +/* is a parameter that can be changed by the implementer. */ +/* This perturbation device will be obsolete on machines with */ +/* properly implemented arithmetic. */ +/* To switch it off, set L2PERT=.FALSE. To remove it from the */ +/* code, remove the action under L2PERT=.TRUE., leave the ELSE part. */ +/* The following two loops should be blocked and fused with the */ +/* transposed copy above. */ + + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (d__1 = v[q + q * v_dim1], abs(d__1)); + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + if (p > q && (d__1 = v[p + q * v_dim1], abs(d__1)) + <= temp1 || p < q) { + v[p + q * v_dim1] = d_sign(&temp1, &v[p + q * + v_dim1]); + } + if (p < q) { + v[p + q * v_dim1] = -v[p + q * v_dim1]; + } +/* L2968: */ + } +/* L2969: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << + 1) + 1], ldv); + } + +/* Estimate the row scaled condition number of R1 */ +/* (If R1 is rectangular, N > NR, then the condition number */ +/* of the leading NR x NR submatrix is estimated.) */ + + dlacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1] + , &nr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p + 1; + temp1 = dnrm2_(&i__2, &work[(*n << 1) + (p - 1) * nr + p], + &c__1); + i__2 = nr - p + 1; + d__1 = 1. / temp1; + dscal_(&i__2, &d__1, &work[(*n << 1) + (p - 1) * nr + p], + &c__1); +/* L3950: */ + } + dpocon_("Lower", &nr, &work[(*n << 1) + 1], &nr, &c_b35, & + temp1, &work[(*n << 1) + nr * nr + 1], &iwork[*m + (* + n << 1) + 1], &ierr); + condr1 = 1. / sqrt(temp1); +/* R1 is OK for inverse <=> CONDR1 .LT. DBLE(N) */ +/* more conservative <=> CONDR1 .LT. DSQRT(DBLE(N)) */ + + cond_ok__ = sqrt((doublereal) nr); +/* [TP] COND_OK is a tuning parameter. */ + if (condr1 < cond_ok__) { +/* implementation, this QRF should be implemented as the QRF */ +/* of a lower triangular matrix. */ +/* R1^t = Q2 * R2 */ + i__1 = *lwork - (*n << 1); + dgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(* + n << 1) + 1], &i__1, &ierr); + + if (l2pert) { + xsc = sqrt(small) / epsln; + i__1 = nr; + for (p = 2; p <= i__1; ++p) { + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { +/* Computing MIN */ + d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), + d__4 = (d__2 = v[q + q * v_dim1], abs( + d__2)); + temp1 = xsc * f2cmin(d__3,d__4); + if ((d__1 = v[q + p * v_dim1], abs(d__1)) <= + temp1) { + v[q + p * v_dim1] = d_sign(&temp1, &v[q + + p * v_dim1]); + } +/* L3958: */ + } +/* L3959: */ + } + } + + if (nr != *n) { + dlacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << + 1) + 1], n); + } + + i__1 = nr - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p; + dcopy_(&i__2, &v[p + (p + 1) * v_dim1], ldv, &v[p + 1 + + p * v_dim1], &c__1); +/* L1969: */ + } + + condr2 = condr1; + + } else { + +/* Note that windowed pivoting would be equally good */ +/* numerically, and more run-time efficient. So, in */ +/* an optimal implementation, the next call to DGEQP3 */ +/* should be replaced with eg. CALL SGEQPX (ACM TOMS #782) */ +/* with properly (carefully) chosen parameters. */ + +/* R1^t * P2 = Q2 * R2 */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + iwork[*n + p] = 0; +/* L3003: */ + } + i__1 = *lwork - (*n << 1); + dgeqp3_(n, &nr, &v[v_offset], ldv, &iwork[*n + 1], &work[* + n + 1], &work[(*n << 1) + 1], &i__1, &ierr); +/* * CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), */ +/* * $ LWORK-2*N, IERR ) */ + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (p = 2; p <= i__1; ++p) { + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { +/* Computing MIN */ + d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), + d__4 = (d__2 = v[q + q * v_dim1], abs( + d__2)); + temp1 = xsc * f2cmin(d__3,d__4); + if ((d__1 = v[q + p * v_dim1], abs(d__1)) <= + temp1) { + v[q + p * v_dim1] = d_sign(&temp1, &v[q + + p * v_dim1]); + } +/* L3968: */ + } +/* L3969: */ + } + } + + dlacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + + 1], n); + + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (p = 2; p <= i__1; ++p) { + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { +/* Computing MIN */ + d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), + d__4 = (d__2 = v[q + q * v_dim1], abs( + d__2)); + temp1 = xsc * f2cmin(d__3,d__4); + v[p + q * v_dim1] = -d_sign(&temp1, &v[q + p * + v_dim1]); +/* L8971: */ + } +/* L8970: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("L", &i__1, &i__2, &c_b34, &c_b34, &v[v_dim1 + + 2], ldv); + } +/* Now, compute R2 = L3 * Q3, the LQ factorization. */ + i__1 = *lwork - (*n << 1) - *n * nr - nr; + dgelqf_(&nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + *n + * nr + 1], &work[(*n << 1) + *n * nr + nr + 1], & + i__1, &ierr); + dlacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + + *n * nr + nr + 1], &nr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + temp1 = dnrm2_(&p, &work[(*n << 1) + *n * nr + nr + p] + , &nr); + d__1 = 1. / temp1; + dscal_(&p, &d__1, &work[(*n << 1) + *n * nr + nr + p], + &nr); +/* L4950: */ + } + dpocon_("L", &nr, &work[(*n << 1) + *n * nr + nr + 1], & + nr, &c_b35, &temp1, &work[(*n << 1) + *n * nr + + nr + nr * nr + 1], &iwork[*m + (*n << 1) + 1], & + ierr); + condr2 = 1. / sqrt(temp1); + + if (condr2 >= cond_ok__) { +/* (this overwrites the copy of R2, as it will not be */ +/* needed in this branch, but it does not overwritte the */ +/* Huseholder vectors of Q2.). */ + dlacpy_("U", &nr, &nr, &v[v_offset], ldv, &work[(*n << + 1) + 1], n); +/* WORK(2*N+N*NR+1:2*N+N*NR+N) */ + } + + } + + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (q = 2; q <= i__1; ++q) { + temp1 = xsc * v[q + q * v_dim1]; + i__2 = q - 1; + for (p = 1; p <= i__2; ++p) { +/* V(p,q) = - DSIGN( TEMP1, V(q,p) ) */ + v[p + q * v_dim1] = -d_sign(&temp1, &v[p + q * + v_dim1]); +/* L4969: */ + } +/* L4968: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << + 1) + 1], ldv); + } + +/* Second preconditioning finished; continue with Jacobi SVD */ +/* The input matrix is lower trinagular. */ + +/* Recover the right singular vectors as solution of a well */ +/* conditioned triangular matrix equation. */ + + if (condr1 < cond_ok__) { + + i__1 = *lwork - (*n << 1) - *n * nr - nr; + dgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[ + 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * + nr + nr + 1], &i__1, info); + scalem = work[(*n << 1) + *n * nr + nr + 1]; + numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + dcopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 + + 1], &c__1); + dscal_(&nr, &sva[p], &v[p * v_dim1 + 1], &c__1); +/* L3970: */ + } + + if (nr == *n) { +/* :)) .. best case, R1 is inverted. The solution of this matrix */ +/* equation is Q2*V2 = the product of the Jacobi rotations */ +/* used in DGESVJ, premultiplied with the orthogonal matrix */ +/* from the second QR factorization. */ + dtrsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &a[ + a_offset], lda, &v[v_offset], ldv); + } else { +/* is inverted to get the product of the Jacobi rotations */ +/* used in DGESVJ. The Q-factor from the second QR */ +/* factorization is then built in explicitly. */ + dtrsm_("L", "U", "T", "N", &nr, &nr, &c_b35, &work[(* + n << 1) + 1], n, &v[v_offset], ldv); + if (nr < *n) { + i__1 = *n - nr; + dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + + 1 + v_dim1], ldv); + i__1 = *n - nr; + dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + + 1) * v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + + 1 + (nr + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, + &work[*n + 1], &v[v_offset], ldv, &work[(*n << + 1) + *n * nr + nr + 1], &i__1, &ierr); + } + + } else if (condr2 < cond_ok__) { + +/* :) .. the input matrix A is very likely a relative of */ +/* the Kahan matrix :) */ +/* The matrix R2 is inverted. The solution of the matrix equation */ +/* is Q3^T*V3 = the product of the Jacobi rotations (appplied to */ +/* the lower triangular L3 from the LQ factorization of */ +/* R2=L3*Q3), pre-multiplied with the transposed Q3. */ + i__1 = *lwork - (*n << 1) - *n * nr - nr; + dgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[ + 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * + nr + nr + 1], &i__1, info); + scalem = work[(*n << 1) + *n * nr + nr + 1]; + numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + dcopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 + + 1], &c__1); + dscal_(&nr, &sva[p], &u[p * u_dim1 + 1], &c__1); +/* L3870: */ + } + dtrsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &work[(*n << + 1) + 1], n, &u[u_offset], ldu); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = + u[p + q * u_dim1]; +/* L872: */ + } + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr + + p]; +/* L874: */ + } +/* L873: */ + } + if (nr < *n) { + i__1 = *n - nr; + dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + + v_dim1], ldv); + i__1 = *n - nr; + dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + + (nr + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, & + work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) + + *n * nr + nr + 1], &i__1, &ierr); + } else { +/* Last line of defense. */ +/* #:( This is a rather pathological case: no scaled condition */ +/* improvement after two pivoted QR factorizations. Other */ +/* possibility is that the rank revealing QR factorization */ +/* or the condition estimator has failed, or the COND_OK */ +/* is set very close to ONE (which is unnecessary). Normally, */ +/* this branch should never be executed, but in rare cases of */ +/* failure of the RRQR or condition estimator, the last line of */ +/* defense ensures that DGEJSV completes the task. */ +/* Compute the full SVD of L3 using DGESVJ with explicit */ +/* accumulation of Jacobi rotations. */ + i__1 = *lwork - (*n << 1) - *n * nr - nr; + dgesvj_("L", "U", "V", &nr, &nr, &v[v_offset], ldv, &sva[ + 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * + nr + nr + 1], &i__1, info); + scalem = work[(*n << 1) + *n * nr + nr + 1]; + numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]); + if (nr < *n) { + i__1 = *n - nr; + dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + + v_dim1], ldv); + i__1 = *n - nr; + dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + + (nr + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, & + work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) + + *n * nr + nr + 1], &i__1, &ierr); + + i__1 = *lwork - (*n << 1) - *n * nr - nr; + dormlq_("L", "T", &nr, &nr, &nr, &work[(*n << 1) + 1], n, + &work[(*n << 1) + *n * nr + 1], &u[u_offset], ldu, + &work[(*n << 1) + *n * nr + nr + 1], &i__1, & + ierr); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = + u[p + q * u_dim1]; +/* L772: */ + } + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr + + p]; +/* L774: */ + } +/* L773: */ + } + + } + +/* Permute the rows of V using the (column) permutation from the */ +/* first QRF. Also, scale the columns to make them unit in */ +/* Euclidean norm. This applies to all cases. */ + + temp1 = sqrt((doublereal) (*n)) * epsln; + i__1 = *n; + for (q = 1; q <= i__1; ++q) { + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * + v_dim1]; +/* L972: */ + } + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p] + ; +/* L973: */ + } + xsc = 1. / dnrm2_(n, &v[q * v_dim1 + 1], &c__1); + if (xsc < 1. - temp1 || xsc > temp1 + 1.) { + dscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1); + } +/* L1972: */ + } +/* At this moment, V contains the right singular vectors of A. */ +/* Next, assemble the left singular vector matrix U (M x N). */ + if (nr < *m) { + i__1 = *m - nr; + dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + + (nr + 1) * u_dim1], ldu); + } + } + +/* The Q matrix from the first QRF is built into the left singular */ +/* matrix U. This applies to all cases. */ + + i__1 = *lwork - *n; + dormqr_("Left", "No_Tr", m, &n1, n, &a[a_offset], lda, &work[ + 1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); +/* The columns of U are normalized. The cost is O(M*N) flops. */ + temp1 = sqrt((doublereal) (*m)) * epsln; + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + xsc = 1. / dnrm2_(m, &u[p * u_dim1 + 1], &c__1); + if (xsc < 1. - temp1 || xsc > temp1 + 1.) { + dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); + } +/* L1973: */ + } + +/* If the initial QRF is computed with row pivoting, the left */ +/* singular vectors must be adjusted. */ + + if (rowpiv) { + i__1 = *m - 1; + dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n + << 1) + 1], &c_n1); + } + + } else { + +/* the second QRF is not needed */ + + dlacpy_("Upper", n, n, &a[a_offset], lda, &work[*n + 1], n); + if (l2pert) { + xsc = sqrt(small); + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + temp1 = xsc * work[*n + (p - 1) * *n + p]; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + work[*n + (q - 1) * *n + p] = -d_sign(&temp1, & + work[*n + (p - 1) * *n + q]); +/* L5971: */ + } +/* L5970: */ + } + } else { + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &work[*n + + 2], n); + } + + i__1 = *lwork - *n - *n * *n; + dgesvj_("Upper", "U", "N", n, n, &work[*n + 1], n, &sva[1], n, + &u[u_offset], ldu, &work[*n + *n * *n + 1], &i__1, + info); + + scalem = work[*n + *n * *n + 1]; + numrank = i_dnnt(&work[*n + *n * *n + 2]); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + dcopy_(n, &work[*n + (p - 1) * *n + 1], &c__1, &u[p * + u_dim1 + 1], &c__1); + dscal_(n, &sva[p], &work[*n + (p - 1) * *n + 1], &c__1); +/* L6970: */ + } + + dtrsm_("Left", "Upper", "NoTrans", "No UD", n, n, &c_b35, &a[ + a_offset], lda, &work[*n + 1], n); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + dcopy_(n, &work[*n + p], n, &v[iwork[p] + v_dim1], ldv); +/* L6972: */ + } + temp1 = sqrt((doublereal) (*n)) * epsln; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + xsc = 1. / dnrm2_(n, &v[p * v_dim1 + 1], &c__1); + if (xsc < 1. - temp1 || xsc > temp1 + 1.) { + dscal_(n, &xsc, &v[p * v_dim1 + 1], &c__1); + } +/* L6971: */ + } + +/* Assemble the left singular vector matrix U (M x N). */ + + if (*n < *m) { + i__1 = *m - *n; + dlaset_("A", &i__1, n, &c_b34, &c_b34, &u[*n + 1 + u_dim1] + , ldu); + if (*n < n1) { + i__1 = n1 - *n; + dlaset_("A", n, &i__1, &c_b34, &c_b34, &u[(*n + 1) * + u_dim1 + 1], ldu); + i__1 = *m - *n; + i__2 = n1 - *n; + dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[*n + 1 + + (*n + 1) * u_dim1], ldu); + } + } + i__1 = *lwork - *n; + dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[ + 1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); + temp1 = sqrt((doublereal) (*m)) * epsln; + i__1 = n1; + for (p = 1; p <= i__1; ++p) { + xsc = 1. / dnrm2_(m, &u[p * u_dim1 + 1], &c__1); + if (xsc < 1. - temp1 || xsc > temp1 + 1.) { + dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); + } +/* L6973: */ + } + + if (rowpiv) { + i__1 = *m - 1; + dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n + << 1) + 1], &c_n1); + } + + } + +/* end of the >> almost orthogonal case << in the full SVD */ + + } else { + +/* This branch deploys a preconditioned Jacobi SVD with explicitly */ +/* accumulated rotations. It is included as optional, mainly for */ +/* experimental purposes. It does perform well, and can also be used. */ +/* In this implementation, this branch will be automatically activated */ +/* if the condition number sigma_max(A) / sigma_min(A) is predicted */ +/* to be greater than the overflow threshold. This is because the */ +/* a posteriori computation of the singular vectors assumes robust */ +/* implementation of BLAS and some LAPACK procedures, capable of working */ +/* in presence of extreme values. Since that is not always the case, ... */ + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], & + c__1); +/* L7968: */ + } + + if (l2pert) { + xsc = sqrt(small / epsln); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (d__1 = v[q + q * v_dim1], abs(d__1)); + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + if (p > q && (d__1 = v[p + q * v_dim1], abs(d__1)) <= + temp1 || p < q) { + v[p + q * v_dim1] = d_sign(&temp1, &v[p + q * + v_dim1]); + } + if (p < q) { + v[p + q * v_dim1] = -v[p + q * v_dim1]; + } +/* L5968: */ + } +/* L5969: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + } + i__1 = *lwork - (*n << 1); + dgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << 1) + + 1], &i__1, &ierr); + dlacpy_("L", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1], n); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p + 1; + dcopy_(&i__2, &v[p + p * v_dim1], ldv, &u[p + p * u_dim1], & + c__1); +/* L7969: */ + } + if (l2pert) { + xsc = sqrt(small / epsln); + i__1 = nr; + for (q = 2; q <= i__1; ++q) { + i__2 = q - 1; + for (p = 1; p <= i__2; ++p) { +/* Computing MIN */ + d__3 = (d__1 = u[p + p * u_dim1], abs(d__1)), d__4 = ( + d__2 = u[q + q * u_dim1], abs(d__2)); + temp1 = xsc * f2cmin(d__3,d__4); + u[p + q * u_dim1] = -d_sign(&temp1, &u[q + p * u_dim1] + ); +/* L9971: */ + } +/* L9970: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + + 1], ldu); + } + i__1 = *lwork - (*n << 1) - *n * nr; + dgesvj_("G", "U", "V", &nr, &nr, &u[u_offset], ldu, &sva[1], n, & + v[v_offset], ldv, &work[(*n << 1) + *n * nr + 1], &i__1, + info); + scalem = work[(*n << 1) + *n * nr + 1]; + numrank = i_dnnt(&work[(*n << 1) + *n * nr + 2]); + if (nr < *n) { + i__1 = *n - nr; + dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], + ldv); + i__1 = *n - nr; + dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &work[*n + + 1], &v[v_offset], ldv, &work[(*n << 1) + *n * nr + nr + 1] + , &i__1, &ierr); + +/* Permute the rows of V using the (column) permutation from the */ +/* first QRF. Also, scale the columns to make them unit in */ +/* Euclidean norm. This applies to all cases. */ + + temp1 = sqrt((doublereal) (*n)) * epsln; + i__1 = *n; + for (q = 1; q <= i__1; ++q) { + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * + v_dim1]; +/* L8972: */ + } + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p]; +/* L8973: */ + } + xsc = 1. / dnrm2_(n, &v[q * v_dim1 + 1], &c__1); + if (xsc < 1. - temp1 || xsc > temp1 + 1.) { + dscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1); + } +/* L7972: */ + } + +/* At this moment, V contains the right singular vectors of A. */ +/* Next, assemble the left singular vector matrix U (M x N). */ + + if (nr < *m) { + i__1 = *m - nr; + dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], + ldu); + if (nr < n1) { + i__1 = n1 - nr; + dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + ( + nr + 1) * u_dim1], ldu); + } + } + + i__1 = *lwork - *n; + dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], & + u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); + + if (rowpiv) { + i__1 = *m - 1; + dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + + 1], &c_n1); + } + + + } + if (transp) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + dswap_(n, &u[p * u_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & + c__1); +/* L6974: */ + } + } + + } +/* end of the full SVD */ + +/* Undo scaling, if necessary (and possible) */ + + if (uscal2 <= big / sva[1] * uscal1) { + dlascl_("G", &c__0, &c__0, &uscal1, &uscal2, &nr, &c__1, &sva[1], n, & + ierr); + uscal1 = 1.; + uscal2 = 1.; + } + + if (nr < *n) { + i__1 = *n; + for (p = nr + 1; p <= i__1; ++p) { + sva[p] = 0.; +/* L3004: */ + } + } + + work[1] = uscal2 * scalem; + work[2] = uscal1; + if (errest) { + work[3] = sconda; + } + if (lsvec && rsvec) { + work[4] = condr1; + work[5] = condr2; + } + if (l2tran) { + work[6] = entra; + work[7] = entrat; + } + + iwork[1] = nr; + iwork[2] = numrank; + iwork[3] = warning; + + return 0; +} /* dgejsv_ */ + diff --git a/lapack-netlib/SRC/dgelq.c b/lapack-netlib/SRC/dgelq.c new file mode 100644 index 000000000..9d2e7860d --- /dev/null +++ b/lapack-netlib/SRC/dgelq.c @@ -0,0 +1,745 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGELQ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, M, N, TSIZE, LWORK */ +/* DOUBLE PRECISION A( LDA, * ), T( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELQ computes an LQ factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = ( L 0 ) * Q */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a N-by-N orthogonal matrix; */ +/* > L is a lower-triangular M-by-M matrix; */ +/* > 0 is a M-by-(N-M) zero matrix, if M < N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and below the diagonal of the array */ +/* > contain the M-by-f2cmin(M,N) lower trapezoidal matrix L */ +/* > (L is lower triangular if M <= N); */ +/* > the elements above the diagonal are used to store part of the */ +/* > data structure to represent Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)) */ +/* > On exit, if INFO = 0, T(1) returns optimal (or either minimal */ +/* > or optimal, if query is assumed) TSIZE. See TSIZE for details. */ +/* > Remaining T contains part of the data structure used to represent Q. */ +/* > If one wants to apply or construct Q, then one needs to keep T */ +/* > (in addition to A) and pass it to further subroutines. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TSIZE */ +/* > \verbatim */ +/* > TSIZE is INTEGER */ +/* > If TSIZE >= 5, the dimension of the array T. */ +/* > If TSIZE = -1 or -2, then a workspace query is assumed. The routine */ +/* > only calculates the sizes of the T and WORK arrays, returns these */ +/* > values as the first entries of the T and WORK arrays, and no error */ +/* > message related to T or WORK is issued by XERBLA. */ +/* > If TSIZE = -1, the routine calculates optimal size of T for the */ +/* > optimum performance and returns this value in T(1). */ +/* > If TSIZE = -2, the routine calculates minimal size of T and */ +/* > returns this value in T(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */ +/* > or optimal, if query was assumed) LWORK. */ +/* > See LWORK for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1 or -2, then a workspace query is assumed. The routine */ +/* > only calculates the sizes of the T and WORK arrays, returns these */ +/* > values as the first entries of the T and WORK arrays, and no error */ +/* > message related to T or WORK is issued by XERBLA. */ +/* > If LWORK = -1, the routine calculates optimal size of WORK for the */ +/* > optimal performance and returns this value in WORK(1). */ +/* > If LWORK = -2, the routine calculates minimal size of WORK and */ +/* > returns this value in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \par Further Details */ +/* ==================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The goal of the interface is to give maximum freedom to the developers for */ +/* > creating any LQ factorization algorithm they wish. The triangular */ +/* > (trapezoidal) L has to be stored in the lower part of A. The lower part of A */ +/* > and the array T can be used to store any relevant information for applying or */ +/* > constructing the Q factor. The WORK array can safely be discarded after exit. */ +/* > */ +/* > Caution: One should not expect the sizes of T and WORK to be the same from one */ +/* > LAPACK implementation to the other, or even from one execution to the other. */ +/* > A workspace query (for T and WORK) is needed at each execution. However, */ +/* > for a given execution, the size of T and WORK are fixed and will not change */ +/* > from one query to the next. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* > \par Further Details particular to this LAPACK implementation: */ +/* ============================================================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > These details are particular for this LAPACK implementation. Users should not */ +/* > take them for granted. These details may change in the future, and are not likely */ +/* > true for another LAPACK implementation. These details are relevant if one wants */ +/* > to try to understand the code. They are not part of the interface. */ +/* > */ +/* > In this version, */ +/* > */ +/* > T(2): row block size (MB) */ +/* > T(3): column block size (NB) */ +/* > T(6:TSIZE): data structure needed for Q, computed by */ +/* > DLASWLQ or DGELQT */ +/* > */ +/* > Depending on the matrix dimensions M and N, and row and column */ +/* > block sizes MB and NB returned by ILAENV, DGELQ will use either */ +/* > DLASWLQ (if the matrix is short-and-wide) or DGELQT to compute */ +/* > the LQ factorization. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgelq_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *t, integer *tsize, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + logical mint, minw; + integer lwmin, lwreq, lwopt, mb, nb, nblcks; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dgelqt_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + logical lminws, lquery; + integer mintsz; + extern /* Subroutine */ int dlaswlq_(integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --t; + --work; + + /* Function Body */ + *info = 0; + + lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2; + + mint = FALSE_; + minw = FALSE_; + if (*tsize == -2 || *lwork == -2) { + if (*tsize != -1) { + mint = TRUE_; + } + if (*lwork != -1) { + minw = TRUE_; + } + } + +/* Determine the block size */ + + if (f2cmin(*m,*n) > 0) { + mb = ilaenv_(&c__1, "DGELQ ", " ", m, n, &c__1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb = ilaenv_(&c__1, "DGELQ ", " ", m, n, &c__2, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + mb = 1; + nb = *n; + } + if (mb > f2cmin(*m,*n) || mb < 1) { + mb = 1; + } + if (nb > *n || nb <= *m) { + nb = *n; + } + mintsz = *m + 5; + if (nb > *m && *n > *m) { + if ((*n - *m) % (nb - *m) == 0) { + nblcks = (*n - *m) / (nb - *m); + } else { + nblcks = (*n - *m) / (nb - *m) + 1; + } + } else { + nblcks = 1; + } + +/* Determine if the workspace size satisfies minimal size */ + + if (*n <= *m || nb <= *m || nb >= *n) { + lwmin = f2cmax(1,*n); +/* Computing MAX */ + i__1 = 1, i__2 = mb * *n; + lwopt = f2cmax(i__1,i__2); + } else { + lwmin = f2cmax(1,*m); +/* Computing MAX */ + i__1 = 1, i__2 = mb * *m; + lwopt = f2cmax(i__1,i__2); + } + lminws = FALSE_; +/* Computing MAX */ + i__1 = 1, i__2 = mb * *m * nblcks + 5; + if ((*tsize < f2cmax(i__1,i__2) || *lwork < lwopt) && *lwork >= lwmin && * + tsize >= mintsz && ! lquery) { +/* Computing MAX */ + i__1 = 1, i__2 = mb * *m * nblcks + 5; + if (*tsize < f2cmax(i__1,i__2)) { + lminws = TRUE_; + mb = 1; + nb = *n; + } + if (*lwork < lwopt) { + lminws = TRUE_; + mb = 1; + } + } + if (*n <= *m || nb <= *m || nb >= *n) { +/* Computing MAX */ + i__1 = 1, i__2 = mb * *n; + lwreq = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = mb * *m; + lwreq = f2cmax(i__1,i__2); + } + + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mb * *m * nblcks + 5; + if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) { + *info = -6; + } else if (*lwork < lwreq && ! lquery && ! lminws) { + *info = -8; + } + } + + if (*info == 0) { + if (mint) { + t[1] = (doublereal) mintsz; + } else { + t[1] = (doublereal) (mb * *m * nblcks + 5); + } + t[2] = (doublereal) mb; + t[3] = (doublereal) nb; + if (minw) { + work[1] = (doublereal) lwmin; + } else { + work[1] = (doublereal) lwreq; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELQ", &i__1, (ftnlen)5); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (f2cmin(*m,*n) == 0) { + return 0; + } + +/* The LQ Decomposition */ + + if (*n <= *m || nb <= *m || nb >= *n) { + dgelqt_(m, n, &mb, &a[a_offset], lda, &t[6], &mb, &work[1], info); + } else { + dlaswlq_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &mb, &work[1], + lwork, info); + } + + work[1] = (doublereal) lwreq; + + return 0; + +/* End of DGELQ */ + +} /* dgelq_ */ + diff --git a/lapack-netlib/SRC/dgelq2.c b/lapack-netlib/SRC/dgelq2.c new file mode 100644 index 000000000..19737b2ed --- /dev/null +++ b/lapack-netlib/SRC/dgelq2.c @@ -0,0 +1,597 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELQ2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELQ2 computes an LQ factorization of a real m-by-n matrix A: */ +/* > */ +/* > A = ( L 0 ) * Q */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a n-by-n orthogonal matrix; */ +/* > L is an lower-triangular m-by-m matrix; */ +/* > 0 is a m-by-(n-m) zero matrix, if m < n. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, the elements on and below the diagonal of the array */ +/* > contain the m by f2cmin(m,n) lower trapezoidal matrix L (L is */ +/* > lower triangular if m <= n); the elements above the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen); + doublereal aii; + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELQ2", &i__1, (ftnlen)6); + return 0; + } + + k = f2cmin(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * a_dim1] + , lda, &tau[i__]); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGELQ2 */ + +} /* dgelq2_ */ + diff --git a/lapack-netlib/SRC/dgelqf.c b/lapack-netlib/SRC/dgelqf.c new file mode 100644 index 000000000..00e2f1f19 --- /dev/null +++ b/lapack-netlib/SRC/dgelqf.c @@ -0,0 +1,700 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGELQF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELQF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELQF computes an LQ factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = ( L 0 ) * Q */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a N-by-N orthogonal matrix; */ +/* > L is an lower-triangular M-by-M matrix; */ +/* > 0 is a M-by-(N-M) zero matrix, if M < N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and below the diagonal of the array */ +/* > contain the m-by-f2cmin(m,n) lower trapezoidal matrix L (L is */ +/* > lower triangular if m <= n); the elements above the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,M). */ +/* > For optimum performance LWORK >= M*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo; + extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer ib, nb; + extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer nx; + extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*lwork < f2cmax(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELQF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = f2cmin(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the LQ factorization of the current block */ +/* A(i:i+ib-1,i:n) */ + + i__3 = *n - i__ + 1; + dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *n - i__ + 1; + dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGELQF */ + +} /* dgelqf_ */ + diff --git a/lapack-netlib/SRC/dgelqt.c b/lapack-netlib/SRC/dgelqt.c new file mode 100644 index 000000000..03035edd8 --- /dev/null +++ b/lapack-netlib/SRC/dgelqt.c @@ -0,0 +1,621 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGELQT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQRT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) */ + +/* INTEGER INFO, LDA, LDT, M, N, MB */ +/* DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELQT computes a blocked LQ factorization of a real M-by-N matrix A */ +/* > using the compact WY representation of Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MB */ +/* > \verbatim */ +/* > MB is INTEGER */ +/* > The block size to be used in the blocked QR. MIN(M,N) >= MB >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and below the diagonal of the array */ +/* > contain the M-by-MIN(M,N) lower trapezoidal matrix L (L is */ +/* > lower triangular if M <= N); the elements above the diagonal */ +/* > are the rows of V. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (LDT,MIN(M,N)) */ +/* > The upper triangular block reflectors stored in compact form */ +/* > as a sequence of upper triangular blocks. See below */ +/* > for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= MB. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MB*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th row */ +/* > above the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 v1 v1 v1 v1 ) */ +/* > ( 1 v2 v2 v2 ) */ +/* > ( 1 v3 v3 ) */ +/* > */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. */ +/* > Let K=MIN(M,N). The number of blocks is B = ceiling(K/MB), where each */ +/* > block is of order MB except for the last block, which is of order */ +/* > IB = K - (B-1)*MB. For each of the B blocks, a upper triangular block */ +/* > reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB */ +/* > for the last block) T's are stored in the MB-by-K matrix T as */ +/* > */ +/* > T = (T1 T2 ... TB). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgelqt_(integer *m, integer *n, integer *mb, doublereal * + a, integer *lda, doublereal *t, integer *ldt, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, k, iinfo, ib; + extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), xerbla_(char *, + integer *, ftnlen), dgelqt3_(integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*mb < 1 || *mb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldt < *mb) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELQT", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + k = f2cmin(*m,*n); + if (k == 0) { + return 0; + } + +/* Blocked loop of length K */ + + i__1 = k; + i__2 = *mb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,*mb); + +/* Compute the LQ factorization of the current block A(I:M,I:I+IB-1) */ + + i__3 = *n - i__ + 1; + dgelqt3_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 + 1] + , ldt, &iinfo); + if (i__ + ib <= *m) { + +/* Update by applying H**T to A(I:M,I+IB:N) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + i__5 = *m - i__ - ib + 1; + dlarfb_("R", "N", "F", "R", &i__3, &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + ib + + i__ * a_dim1], lda, &work[1], &i__5); + } + } + return 0; + +/* End of DGELQT */ + +} /* dgelqt_ */ + diff --git a/lapack-netlib/SRC/dgelqt3.c b/lapack-netlib/SRC/dgelqt3.c new file mode 100644 index 000000000..ae8bc0b71 --- /dev/null +++ b/lapack-netlib/SRC/dgelqt3.c @@ -0,0 +1,679 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGELQT3 recursively computes a LQ factorization of a general real or complex matrix using the c +ompact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQRT3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELQT3( M, N, A, LDA, T, LDT, INFO ) */ + +/* INTEGER INFO, LDA, M, N, LDT */ +/* DOUBLE PRECISION A( LDA, * ), T( LDT, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELQT3 recursively computes a LQ factorization of a real M-by-N */ +/* > matrix A, using the compact WY representation of Q. */ +/* > */ +/* > Based on the algorithm of Elmroth and Gustavson, */ +/* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M =< N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the real M-by-N matrix A. On exit, the elements on and */ +/* > below the diagonal contain the N-by-N lower triangular matrix L; the */ +/* > elements above the diagonal are the rows of V. See below for */ +/* > further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (LDT,N) */ +/* > The N-by-N upper triangular factor of the block reflector. */ +/* > The elements on and above the diagonal contain the block */ +/* > reflector T; the elements below the diagonal are not used. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th row */ +/* > above the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 v1 v1 v1 v1 ) */ +/* > ( 1 v2 v2 v2 ) */ +/* > ( 1 v3 v3 v3 ) */ +/* > */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. The */ +/* > block reflector H is then given by */ +/* > */ +/* > H = I - V * T * V**T */ +/* > */ +/* > where V**T is the transpose of V. */ +/* > */ +/* > For details of the algorithm, see Elmroth and Gustavson (cited above). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgelqt3_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *t, integer *ldt, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iinfo; + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer i1, j1, m1, m2; + extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*ldt < f2cmax(1,*m)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELQT3", &i__1, (ftnlen)7); + return 0; + } + + if (*m == 1) { + +/* Compute Householder transform when N=1 */ + + dlarfg_(n, &a[a_offset], &a[f2cmin(2,*n) * a_dim1 + 1], lda, &t[t_offset] + ); + + } else { + +/* Otherwise, split A into blocks... */ + + m1 = *m / 2; + m2 = *m - m1; +/* Computing MIN */ + i__1 = m1 + 1; + i1 = f2cmin(i__1,*m); +/* Computing MIN */ + i__1 = *m + 1; + j1 = f2cmin(i__1,*n); + +/* Compute A(1:M1,1:N) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */ + + dgelqt3_(&m1, n, &a[a_offset], lda, &t[t_offset], ldt, &iinfo); + +/* Compute A(J1:M,1:N) = Q1^H A(J1:M,1:N) [workspace: T(1:N1,J1:N)] */ + + i__1 = m2; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = m1; + for (j = 1; j <= i__2; ++j) { + t[i__ + m1 + j * t_dim1] = a[i__ + m1 + j * a_dim1]; + } + } + dtrmm_("R", "U", "T", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 + + t_dim1], ldt); + + i__1 = *n - m1; + dgemm_("N", "T", &m2, &m1, &i__1, &c_b7, &a[i1 + i1 * a_dim1], lda, & + a[i1 * a_dim1 + 1], lda, &c_b7, &t[i1 + t_dim1], ldt); + + dtrmm_("R", "U", "N", "N", &m2, &m1, &c_b7, &t[t_offset], ldt, &t[i1 + + t_dim1], ldt); + + i__1 = *n - m1; + dgemm_("N", "N", &m2, &i__1, &m1, &c_b19, &t[i1 + t_dim1], ldt, &a[i1 + * a_dim1 + 1], lda, &c_b7, &a[i1 + i1 * a_dim1], lda); + + dtrmm_("R", "U", "N", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 + + t_dim1], ldt); + + i__1 = m2; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = m1; + for (j = 1; j <= i__2; ++j) { + a[i__ + m1 + j * a_dim1] -= t[i__ + m1 + j * t_dim1]; + t[i__ + m1 + j * t_dim1] = 0.; + } + } + +/* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */ + + i__1 = *n - m1; + dgelqt3_(&m2, &i__1, &a[i1 + i1 * a_dim1], lda, &t[i1 + i1 * t_dim1], + ldt, &iinfo); + +/* Compute T3 = T(J1:N1,1:N) = -T1 Y1^H Y2 T2 */ + + i__1 = m2; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = m1; + for (j = 1; j <= i__2; ++j) { + t[j + (i__ + m1) * t_dim1] = a[j + (i__ + m1) * a_dim1]; + } + } + + dtrmm_("R", "U", "T", "U", &m1, &m2, &c_b7, &a[i1 + i1 * a_dim1], lda, + &t[i1 * t_dim1 + 1], ldt); + + i__1 = *n - *m; + dgemm_("N", "T", &m1, &m2, &i__1, &c_b7, &a[j1 * a_dim1 + 1], lda, &a[ + i1 + j1 * a_dim1], lda, &c_b7, &t[i1 * t_dim1 + 1], ldt); + + dtrmm_("L", "U", "N", "N", &m1, &m2, &c_b19, &t[t_offset], ldt, &t[i1 + * t_dim1 + 1], ldt); + + dtrmm_("R", "U", "N", "N", &m1, &m2, &c_b7, &t[i1 + i1 * t_dim1], ldt, + &t[i1 * t_dim1 + 1], ldt); + + + +/* Y = (Y1,Y2); L = [ L1 0 ]; T = [T1 T3] */ +/* [ A(1:N1,J1:N) L2 ] [ 0 T2] */ + + } + + return 0; + +/* End of DGELQT3 */ + +} /* dgelqt3_ */ + diff --git a/lapack-netlib/SRC/dgels.c b/lapack-netlib/SRC/dgels.c new file mode 100644 index 000000000..34fffd2ba --- /dev/null +++ b/lapack-netlib/SRC/dgels.c @@ -0,0 +1,956 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGELS solves overdetermined or underdetermined systems for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELS solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, or its transpose, using a QR or LQ */ +/* > factorization of A. It is assumed that A has full rank. */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ +/* > an underdetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'T' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'T': the linear system involves A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if M >= N, A is overwritten by details of its QR */ +/* > factorization as returned by DGEQRF; */ +/* > if M < N, A is overwritten by details of its LQ */ +/* > factorization as returned by DGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'T'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors; the residual sum of squares for the */ +/* > solution in each column is given by the sum of squares of */ +/* > elements N+1 to M in that column; */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors; the residual sum of squares */ +/* > for the solution in each column is given by the sum of */ +/* > squares of elements M+1 to N in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */ +/* > For optimal performance, */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS )*NB ). */ +/* > where MN = f2cmin(M,N) and NB is the optimum block size. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dgels_(char *trans, integer *m, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + doublereal anrm, bnrm; + integer brow; + logical tpsd; + integer i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer wsize; + doublereal rwork[1]; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + integer nb; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + integer mn; + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), dlaset_(char *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer scllen; + doublereal bignum; + extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), + dormqr_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (! (lsame_(trans, "N") || lsame_(trans, "T"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs); + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (lsame_(trans, "N")) { + tpsd = FALSE_; + } + + if (*m >= *n) { + nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + if (tpsd) { +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "DORMQR", "LN", m, nrhs, n, & + c_n1, (ftnlen)6, (ftnlen)2); + nb = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, & + c_n1, (ftnlen)6, (ftnlen)2); + nb = f2cmax(i__1,i__2); + } + } else { + nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + if (tpsd) { +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "DORMLQ", "LT", n, nrhs, m, & + c_n1, (ftnlen)6, (ftnlen)2); + nb = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "DORMLQ", "LN", n, nrhs, m, & + c_n1, (ftnlen)6, (ftnlen)2); + nb = f2cmax(i__1,i__2); + } + } + +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs) * nb; + wsize = f2cmax(i__1,i__2); + work[1] = (doublereal) wsize; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELS ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + dlaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); + return 0; + } + +/* Get machine parameters */ + + smlnum = dlamch_("S") / dlamch_("P"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + dlaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); + goto L50; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* compute QR factorization of A */ + + i__1 = *lwork - mn; + dgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) + ; + +/* workspace at least N, optimally N*NB */ + + if (! tpsd) { + +/* Least-Squares Problem f2cmin || A * X - B || */ + +/* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */ + + i__1 = *lwork - mn; + dormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[ + 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* Underdetermined system of equations A**T * X = B */ + +/* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */ + + i__1 = *lwork - mn; + dormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, & + work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *m; + + } + + } else { + +/* Compute LQ factorization of A */ + + i__1 = *lwork - mn; + dgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) + ; + +/* workspace at least M, optimally M*NB. */ + + if (! tpsd) { + +/* underdetermined system of equations A * X = B */ + +/* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(M+1:N,1:NRHS) = 0 */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */ + + i__1 = *lwork - mn; + dormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[ + 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *n; + + } else { + +/* overdetermined system f2cmin || A**T * X - B || */ + +/* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */ + + i__1 = *lwork - mn; + dormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, & + work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + +L50: + work[1] = (doublereal) wsize; + + return 0; + +/* End of DGELS */ + +} /* dgels_ */ + diff --git a/lapack-netlib/SRC/dgelsd.c b/lapack-netlib/SRC/dgelsd.c new file mode 100644 index 000000000..ee230d054 --- /dev/null +++ b/lapack-netlib/SRC/dgelsd.c @@ -0,0 +1,1153 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELSD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, */ +/* WORK, LWORK, IWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */ +/* DOUBLE PRECISION RCOND */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELSD computes the minimum-norm solution to a real linear least */ +/* > squares problem: */ +/* > minimize 2-norm(| b - A*x |) */ +/* > using the singular value decomposition (SVD) of A. A is an M-by-N */ +/* > matrix which may be rank-deficient. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > */ +/* > The problem is solved in three steps: */ +/* > (1) Reduce the coefficient matrix A to bidiagonal form with */ +/* > Householder transformations, reducing the original problem */ +/* > into a "bidiagonal least squares problem" (BLS) */ +/* > (2) Solve the BLS using a divide and conquer approach. */ +/* > (3) Apply back all the Householder transformations to solve */ +/* > the original least squares problem. */ +/* > */ +/* > The effective rank of A is determined by treating as zero those */ +/* > singular values which are less than RCOND times the largest singular */ +/* > value. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, A has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the M-by-NRHS right hand side matrix B. */ +/* > On exit, B is overwritten by the N-by-NRHS solution */ +/* > matrix X. If m >= n and RANK = n, the residual */ +/* > sum-of-squares for the solution in the i-th column is given */ +/* > by the sum of squares of elements n+1:m in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,f2cmax(M,N)). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The singular values of A in decreasing order. */ +/* > The condition number of A in the 2-norm = S(1)/S(f2cmin(m,n)). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > RCOND is used to determine the effective rank of A. */ +/* > Singular values S(i) <= RCOND*S(1) are treated as zero. */ +/* > If RCOND < 0, machine precision is used instead. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RANK */ +/* > \verbatim */ +/* > RANK is INTEGER */ +/* > The effective rank of A, i.e., the number of singular values */ +/* > which are greater than RCOND*S(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK must be at least 1. */ +/* > The exact minimum amount of workspace needed depends on M, */ +/* > N and NRHS. As long as LWORK is at least */ +/* > 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, */ +/* > if M is greater than or equal to N or */ +/* > 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, */ +/* > if M is less than N, the code will execute correctly. */ +/* > SMLSIZ is returned by ILAENV and is equal to the maximum */ +/* > size of the subproblems at the bottom of the computation */ +/* > tree (usually about 25), and */ +/* > NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > LIWORK >= f2cmax(1, 3 * MINMN * NLVL + 11 * MINMN), */ +/* > where MINMN = MIN( M,N ). */ +/* > On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: the algorithm for computing the SVD failed to converge; */ +/* > if INFO = i, i off-diagonal elements of an intermediate */ +/* > bidiagonal form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup doubleGEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Ren-Cang Li, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > Osni Marques, LBNL/NERSC, USA \n */ + +/* ===================================================================== */ +/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + doublereal anrm, bnrm; + integer itau, nlvl, iascl, ibscl; + doublereal sfmin; + integer minmn, maxmn, itaup, itauq, mnthr, nwork; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + integer ie, il; + extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + extern doublereal dlamch_(char *); + integer mm; + extern doublereal dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlalsd_(char *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), dlascl_(char *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *), dgeqrf_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *), xerbla_(char *, + integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer wlalsd; + extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer ldwork; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer liwork, minwrk, maxwrk; + doublereal smlnum; + logical lquery; + integer smlsiz; + doublereal eps; + + +/* -- LAPACK driver routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + maxmn = f2cmax(*m,*n); + mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, ( + ftnlen)1); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,maxmn)) { + *info = -7; + } + + smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* Compute workspace. */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + minwrk = 1; + liwork = 1; + minmn = f2cmax(1,minmn); +/* Computing MAX */ + i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) / + log(2.)) + 1; + nlvl = f2cmax(i__1,0); + + if (*info == 0) { + maxwrk = 0; + liwork = minmn * 3 * nlvl + minmn * 11; + mm = *m; + if (*m >= *n && *m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT", + m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = f2cmax(i__1,i__2); + } + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD" + , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR", + "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR", + "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = f2cmax(i__1,i__2); +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * * + nrhs + i__1 * i__1; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + wlalsd; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = f2cmax(i__1,i__2), + i__2 = *n * 3 + wlalsd; + minwrk = f2cmax(i__1,i__2); + } + if (*n > *m) { +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * * + nrhs + i__1 * i__1; + if (*n >= mnthr) { + +/* Path 2a - underdetermined, with many more columns */ +/* than rows. */ + + maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, + &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * + ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(& + c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * + ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, ( + ftnlen)6, (ftnlen)3); + maxwrk = f2cmax(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = f2cmax(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ", + "LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd; + maxwrk = f2cmax(i__1,i__2); +/* XXX: Ensure the Path 2a case below is triggered. The workspace */ +/* calculation should use queries for all routines eventually. */ +/* Computing MAX */ +/* Computing MAX */ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = f2cmax(i__3,i__4), i__3 = + f2cmax(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + f2cmax(i__3,i__4); + maxwrk = f2cmax(i__1,i__2); + } else { + +/* Path 2 - remaining underdetermined cases. */ + + maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR" + , "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR", + "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + wlalsd; + maxwrk = f2cmax(i__1,i__2); + } +/* Computing MAX */ + i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = f2cmax(i__1,i__2), + i__2 = *m * 3 + wlalsd; + minwrk = f2cmax(i__1,i__2); + } + minwrk = f2cmin(minwrk,maxwrk); + work[1] = (doublereal) maxwrk; + iwork[1] = liwork; + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELSD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + goto L10; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters. */ + + eps = dlamch_("P"); + sfmin = dlamch_("S"); + smlnum = sfmin / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A if f2cmax entry outside range [SMLNUM,BIGNUM]. */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb); + dlaset_("F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1); + *rank = 0; + goto L10; + } + +/* Scale B if f2cmax entry outside range [SMLNUM,BIGNUM]. */ + + bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* If M < N make sure certain entries of B are zero. */ + + if (*m < *n) { + i__1 = *n - *m; + dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb); + } + +/* Overdetermined case. */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; + itau = 1; + nwork = itau + *n; + +/* Compute A=Q*R. */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + +/* Multiply B by transpose(Q). */ +/* (Workspace: need N+NRHS, prefer N+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below R. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2], + lda); + } + } + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A. */ +/* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ + + i__1 = *lwork - nwork + 1; + dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of R. */ +/* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, + rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of R. */ + + i__1 = *lwork - nwork + 1; + dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & + b[b_offset], ldb, &work[nwork], &i__1, info); + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m, i__2 = (*m << 1) - 4, i__1 = f2cmax(i__1,i__2), i__1 = f2cmax( + i__1,*nrhs), i__2 = *n - *m * 3, i__1 = f2cmax(i__1,i__2); + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + f2cmax(i__1,wlalsd)) { + +/* Path 2a - underdetermined, with many more columns than rows */ +/* and sufficient workspace for an efficient algorithm. */ + + ldwork = *m; +/* Computing MAX */ +/* Computing MAX */ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = f2cmax(i__3,i__4), i__3 = + f2cmax(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = (*m << 2) + *m * *lda + f2cmax(i__3,i__4), i__2 = *m * *lda + + *m + *m * *nrhs, i__1 = f2cmax(i__1,i__2), i__2 = (*m << 2) + + *m * *lda + wlalsd; + if (*lwork >= f2cmax(i__1,i__2)) { + ldwork = *lda; + } + itau = 1; + nwork = *m + 1; + +/* Compute A=L*Q. */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + il = nwork; + +/* Copy L to WORK(IL), zeroing out above its diagonal. */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_("U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], & + ldwork); + ie = il + ldwork * *m; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL). */ +/* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of L. */ +/* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of L. */ + + i__1 = *lwork - nwork + 1; + dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ + itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below first M rows of B. */ + + i__1 = *n - *m; + dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], + ldb); + nwork = itau + *m; + +/* Multiply transpose(Q) by B. */ +/* (Workspace: need M+NRHS, prefer M+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases. */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize A. */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors. */ +/* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of A. */ + + i__1 = *lwork - nwork + 1; + dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + + } + } + +/* Undo scaling. */ + + if (iascl == 1) { + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L10: + work[1] = (doublereal) maxwrk; + iwork[1] = liwork; + return 0; + +/* End of DGELSD */ + +} /* dgelsd_ */ + diff --git a/lapack-netlib/SRC/dgelss.c b/lapack-netlib/SRC/dgelss.c new file mode 100644 index 000000000..7d25a55ec --- /dev/null +++ b/lapack-netlib/SRC/dgelss.c @@ -0,0 +1,1318 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGELSS solves overdetermined or underdetermined systems for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELSS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, */ +/* WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */ +/* DOUBLE PRECISION RCOND */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELSS computes the minimum norm solution to a real linear least */ +/* > squares problem: */ +/* > */ +/* > Minimize 2-norm(| b - A*x |). */ +/* > */ +/* > using the singular value decomposition (SVD) of A. A is an M-by-N */ +/* > matrix which may be rank-deficient. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix */ +/* > X. */ +/* > */ +/* > The effective rank of A is determined by treating as zero those */ +/* > singular values which are less than RCOND times the largest singular */ +/* > value. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the first f2cmin(m,n) rows of A are overwritten with */ +/* > its right singular vectors, stored rowwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the M-by-NRHS right hand side matrix B. */ +/* > On exit, B is overwritten by the N-by-NRHS solution */ +/* > matrix X. If m >= n and RANK = n, the residual */ +/* > sum-of-squares for the solution in the i-th column is given */ +/* > by the sum of squares of elements n+1:m in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,f2cmax(M,N)). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The singular values of A in decreasing order. */ +/* > The condition number of A in the 2-norm = S(1)/S(f2cmin(m,n)). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > RCOND is used to determine the effective rank of A. */ +/* > Singular values S(i) <= RCOND*S(1) are treated as zero. */ +/* > If RCOND < 0, machine precision is used instead. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RANK */ +/* > \verbatim */ +/* > RANK is INTEGER */ +/* > The effective rank of A, i.e., the number of singular values */ +/* > which are greater than RCOND*S(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 1, and also: */ +/* > LWORK >= 3*f2cmin(M,N) + f2cmax( 2*f2cmin(M,N), f2cmax(M,N), NRHS ) */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: the algorithm for computing the SVD failed to converge; */ +/* > if INFO = i, i off-diagonal elements of an intermediate */ +/* > bidiagonal form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dgelss_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + doublereal d__1; + + /* Local variables */ + doublereal anrm, bnrm; + integer itau, lwork_dgebrd__, lwork_dgelqf__, lwork_dgeqrf__, + lwork_dorgbr__, lwork_dormbr__, i__, lwork_dormlq__, + lwork_dormqr__; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iascl, ibscl; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), drscl_(integer *, + doublereal *, doublereal *, integer *); + integer chunk; + doublereal sfmin; + integer minmn; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer maxmn, itaup, itauq, mnthr, iwork; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + integer bl, ie, il; + extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + extern doublereal dlamch_(char *); + integer mm; + extern doublereal dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + integer bdspac; + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen), dbdsqr_(char *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dorgbr_(char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *); + doublereal bignum; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), dormlq_(char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer ldwork; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer minwrk, maxwrk; + doublereal smlnum; + logical lquery; + doublereal dum[1], eps, thr; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --s; + --work; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + maxmn = f2cmax(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,maxmn)) { + *info = -7; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (minmn > 0) { + mm = *m; + mnthr = ilaenv_(&c__6, "DGELSS", " ", m, n, nrhs, &c_n1, (ftnlen) + 6, (ftnlen)1); + if (*m >= *n && *m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than */ +/* columns */ + +/* Compute space needed for DGEQRF */ + dgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info); + lwork_dgeqrf__ = (integer) dum[0]; +/* Compute space needed for DORMQR */ + dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, dum, &b[ + b_offset], ldb, dum, &c_n1, info); + lwork_dormqr__ = (integer) dum[0]; + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_dgeqrf__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_dormqr__; + maxwrk = f2cmax(i__1,i__2); + } + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined */ + +/* Compute workspace needed for DBDSQR */ + +/* Computing MAX */ + i__1 = 1, i__2 = *n * 5; + bdspac = f2cmax(i__1,i__2); +/* Compute space needed for DGEBRD */ + dgebrd_(&mm, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, + &c_n1, info); + lwork_dgebrd__ = (integer) dum[0]; +/* Compute space needed for DORMBR */ + dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, dum, & + b[b_offset], ldb, dum, &c_n1, info); + lwork_dormbr__ = (integer) dum[0]; +/* Compute space needed for DORGBR */ + dorgbr_("P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, + info); + lwork_dorgbr__ = (integer) dum[0]; +/* Compute total workspace needed */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + lwork_dgebrd__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + lwork_dormbr__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + lwork_dorgbr__; + maxwrk = f2cmax(i__1,i__2); + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *nrhs; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = f2cmax(i__1, + i__2); + minwrk = f2cmax(i__1,bdspac); + maxwrk = f2cmax(minwrk,maxwrk); + } + if (*n > *m) { + +/* Compute workspace needed for DBDSQR */ + +/* Computing MAX */ + i__1 = 1, i__2 = *m * 5; + bdspac = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = f2cmax(i__1, + i__2); + minwrk = f2cmax(i__1,bdspac); + if (*n >= mnthr) { + +/* Path 2a - underdetermined, with many more columns */ +/* than rows */ + +/* Compute space needed for DGELQF */ + dgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info); + lwork_dgelqf__ = (integer) dum[0]; +/* Compute space needed for DGEBRD */ + dgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, + dum, &c_n1, info); + lwork_dgebrd__ = (integer) dum[0]; +/* Compute space needed for DORMBR */ + dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, dum, + &b[b_offset], ldb, dum, &c_n1, info); + lwork_dormbr__ = (integer) dum[0]; +/* Compute space needed for DORGBR */ + dorgbr_("P", m, m, m, &a[a_offset], lda, dum, dum, &c_n1, + info); + lwork_dorgbr__ = (integer) dum[0]; +/* Compute space needed for DORMLQ */ + dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, dum, &b[ + b_offset], ldb, dum, &c_n1, info); + lwork_dormlq__ = (integer) dum[0]; +/* Compute total workspace needed */ + maxwrk = *m + lwork_dgelqf__; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + + lwork_dgebrd__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + + lwork_dormbr__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + + lwork_dorgbr__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + bdspac; + maxwrk = f2cmax(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = f2cmax(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m + lwork_dormlq__; + maxwrk = f2cmax(i__1,i__2); + } else { + +/* Path 2 - underdetermined */ + +/* Compute space needed for DGEBRD */ + dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, + dum, &c_n1, info); + lwork_dgebrd__ = (integer) dum[0]; +/* Compute space needed for DORMBR */ + dormbr_("Q", "L", "T", m, nrhs, m, &a[a_offset], lda, dum, + &b[b_offset], ldb, dum, &c_n1, info); + lwork_dormbr__ = (integer) dum[0]; +/* Compute space needed for DORGBR */ + dorgbr_("P", m, n, m, &a[a_offset], lda, dum, dum, &c_n1, + info); + lwork_dorgbr__ = (integer) dum[0]; + maxwrk = *m * 3 + lwork_dgebrd__; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + lwork_dormbr__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + lwork_dorgbr__; + maxwrk = f2cmax(i__1,i__2); + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *nrhs; + maxwrk = f2cmax(i__1,i__2); + } + } + maxwrk = f2cmax(minwrk,maxwrk); + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELSS", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters */ + + eps = dlamch_("P"); + sfmin = dlamch_("S"); + smlnum = sfmin / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + dlaset_("F", &i__1, nrhs, &c_b46, &c_b46, &b[b_offset], ldb); + dlaset_("F", &minmn, &c__1, &c_b46, &c_b46, &s[1], &minmn); + *rank = 0; + goto L70; + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* Overdetermined case */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns */ + + mm = *n; + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__1 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, + info); + +/* Multiply B by transpose(Q) */ +/* (Workspace: need N+NRHS, prefer N+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[iwork], &i__1, info); + +/* Zero out below R */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__1, &i__2, &c_b46, &c_b46, &a[a_dim1 + 2], + lda); + } + } + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ + + i__1 = *lwork - iwork + 1; + dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of R */ +/* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[iwork], &i__1, info); + +/* Generate right bidiagonalizing vectors of R in A */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__1 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], & + i__1, info); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration */ +/* multiply B by transpose of left singular vectors */ +/* compute right singular vectors in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, + dum, &c__1, &b[b_offset], ldb, &work[iwork], info); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + d__1 = *rcond * s[1]; + thr = f2cmax(d__1,sfmin); + if (*rcond < 0.) { +/* Computing MAX */ + d__1 = eps * s[1]; + thr = f2cmax(d__1,sfmin); + } + *rank = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] > thr) { + drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + dlaset_("F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], + ldb); + } +/* L10: */ + } + +/* Multiply B by right singular vectors */ +/* (Workspace: need N, prefer N*NRHS) */ + + if (*lwork >= *ldb * *nrhs && *nrhs > 1) { + dgemm_("T", "N", n, nrhs, n, &c_b79, &a[a_offset], lda, &b[ + b_offset], ldb, &c_b46, &work[1], ldb); + dlacpy_("G", n, nrhs, &work[1], ldb, &b[b_offset], ldb) + ; + } else if (*nrhs > 1) { + chunk = *lwork / *n; + i__1 = *nrhs; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = f2cmin(i__3,chunk); + dgemm_("T", "N", n, &bl, n, &c_b79, &a[a_offset], lda, &b[i__ + * b_dim1 + 1], ldb, &c_b46, &work[1], n); + dlacpy_("G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb); +/* L20: */ + } + } else { + dgemv_("T", n, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, + &c_b46, &work[1], &c__1); + dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1); + } + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = *m, i__1 = (*m << 1) - 4, i__2 = f2cmax(i__2,i__1), i__2 = f2cmax( + i__2,*nrhs), i__1 = *n - *m * 3; + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + f2cmax(i__2,i__1)) { + +/* Path 2a - underdetermined, with many more columns than rows */ +/* and sufficient workspace for an efficient algorithm */ + + ldwork = *m; +/* Computing MAX */ +/* Computing MAX */ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = f2cmax(i__3,i__4), i__3 = + f2cmax(i__3,*nrhs), i__4 = *n - *m * 3; + i__2 = (*m << 2) + *m * *lda + f2cmax(i__3,i__4), i__1 = *m * *lda + + *m + *m * *nrhs; + if (*lwork >= f2cmax(i__2,i__1)) { + ldwork = *lda; + } + itau = 1; + iwork = *m + 1; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, + info); + il = iwork; + +/* Copy L to WORK(IL), zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_("U", &i__2, &i__1, &c_b46, &c_b46, &work[il + ldwork], & + ldwork); + ie = il + ldwork * *m; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL) */ +/* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], + &work[itaup], &work[iwork], &i__2, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of L */ +/* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[iwork], &i__2, info); + +/* Generate right bidiagonalizing vectors of R in WORK(IL) */ +/* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", m, m, m, &work[il], &ldwork, &work[itaup], &work[ + iwork], &i__2, info); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, */ +/* computing right singular vectors of L in WORK(IL) and */ +/* multiplying B by transpose of left singular vectors */ +/* (Workspace: need M*M+M+BDSPAC) */ + + dbdsqr_("U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], & + ldwork, &a[a_offset], lda, &b[b_offset], ldb, &work[iwork] + , info); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + d__1 = *rcond * s[1]; + thr = f2cmax(d__1,sfmin); + if (*rcond < 0.) { +/* Computing MAX */ + d__1 = eps * s[1]; + thr = f2cmax(d__1,sfmin); + } + *rank = 0; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + if (s[i__] > thr) { + drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + dlaset_("F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1] + , ldb); + } +/* L30: */ + } + iwork = ie; + +/* Multiply B by right singular vectors of L in WORK(IL) */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) */ + + if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) { + dgemm_("T", "N", m, nrhs, m, &c_b79, &work[il], &ldwork, &b[ + b_offset], ldb, &c_b46, &work[iwork], ldb); + dlacpy_("G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb); + } else if (*nrhs > 1) { + chunk = (*lwork - iwork + 1) / *m; + i__2 = *nrhs; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = f2cmin(i__3,chunk); + dgemm_("T", "N", m, &bl, m, &c_b79, &work[il], &ldwork, & + b[i__ * b_dim1 + 1], ldb, &c_b46, &work[iwork], m); + dlacpy_("G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1] + , ldb); +/* L40: */ + } + } else { + dgemv_("T", m, m, &c_b79, &work[il], &ldwork, &b[b_dim1 + 1], + &c__1, &c_b46, &work[iwork], &c__1); + dcopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1); + } + +/* Zero out below first M rows of B */ + + i__1 = *n - *m; + dlaset_("F", &i__1, nrhs, &c_b46, &c_b46, &b[*m + 1 + b_dim1], + ldb); + iwork = itau + *m; + +/* Multiply transpose(Q) by B */ +/* (Workspace: need M+NRHS, prefer M+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[iwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__1 = *lwork - iwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors */ +/* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[iwork], &i__1, info); + +/* Generate right bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__1 = *lwork - iwork + 1; + dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__1, info); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, */ +/* computing right singular vectors of A in A and */ +/* multiplying B by transpose of left singular vectors */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], + lda, dum, &c__1, &b[b_offset], ldb, &work[iwork], info); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + d__1 = *rcond * s[1]; + thr = f2cmax(d__1,sfmin); + if (*rcond < 0.) { +/* Computing MAX */ + d__1 = eps * s[1]; + thr = f2cmax(d__1,sfmin); + } + *rank = 0; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] > thr) { + drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + dlaset_("F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1] + , ldb); + } +/* L50: */ + } + +/* Multiply B by right singular vectors of A */ +/* (Workspace: need N, prefer N*NRHS) */ + + if (*lwork >= *ldb * *nrhs && *nrhs > 1) { + dgemm_("T", "N", n, nrhs, m, &c_b79, &a[a_offset], lda, &b[ + b_offset], ldb, &c_b46, &work[1], ldb); + dlacpy_("F", n, nrhs, &work[1], ldb, &b[b_offset], ldb); + } else if (*nrhs > 1) { + chunk = *lwork / *n; + i__1 = *nrhs; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = f2cmin(i__3,chunk); + dgemm_("T", "N", n, &bl, m, &c_b79, &a[a_offset], lda, &b[ + i__ * b_dim1 + 1], ldb, &c_b46, &work[1], n); + dlacpy_("F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], + ldb); +/* L60: */ + } + } else { + dgemv_("T", m, n, &c_b79, &a[a_offset], lda, &b[b_offset], & + c__1, &c_b46, &work[1], &c__1); + dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1); + } + } + } + +/* Undo scaling */ + + if (iascl == 1) { + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L70: + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGELSS */ + +} /* dgelss_ */ + diff --git a/lapack-netlib/SRC/dgelsy.c b/lapack-netlib/SRC/dgelsy.c new file mode 100644 index 000000000..0a48ab1d5 --- /dev/null +++ b/lapack-netlib/SRC/dgelsy.c @@ -0,0 +1,945 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGELSY solves overdetermined or underdetermined systems for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELSY + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, */ +/* WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */ +/* DOUBLE PRECISION RCOND */ +/* INTEGER JPVT( * ) */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELSY computes the minimum-norm solution to a real linear least */ +/* > squares problem: */ +/* > minimize || A * X - B || */ +/* > using a complete orthogonal factorization of A. A is an M-by-N */ +/* > matrix which may be rank-deficient. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > */ +/* > The routine first computes a QR factorization with column pivoting: */ +/* > A * P = Q * [ R11 R12 ] */ +/* > [ 0 R22 ] */ +/* > with R11 defined as the largest leading submatrix whose estimated */ +/* > condition number is less than 1/RCOND. The order of R11, RANK, */ +/* > is the effective rank of A. */ +/* > */ +/* > Then, R22 is considered to be negligible, and R12 is annihilated */ +/* > by orthogonal transformations from the right, arriving at the */ +/* > complete orthogonal factorization: */ +/* > A * P = Q * [ T11 0 ] * Z */ +/* > [ 0 0 ] */ +/* > The minimum-norm solution is then */ +/* > X = P * Z**T [ inv(T11)*Q1**T*B ] */ +/* > [ 0 ] */ +/* > where Q1 consists of the first RANK columns of Q. */ +/* > */ +/* > This routine is basically identical to the original xGELSX except */ +/* > three differences: */ +/* > o The call to the subroutine xGEQPF has been substituted by the */ +/* > the call to the subroutine xGEQP3. This subroutine is a Blas-3 */ +/* > version of the QR factorization with column pivoting. */ +/* > o Matrix B (the right hand side) is updated with Blas-3. */ +/* > o The permutation of matrix B (the right hand side) is faster and */ +/* > more simple. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, A has been overwritten by details of its */ +/* > complete orthogonal factorization. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the M-by-NRHS right hand side matrix B. */ +/* > On exit, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] JPVT */ +/* > \verbatim */ +/* > JPVT is INTEGER array, dimension (N) */ +/* > On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */ +/* > to the front of AP, otherwise column i is a free column. */ +/* > On exit, if JPVT(i) = k, then the i-th column of AP */ +/* > was the k-th column of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > RCOND is used to determine the effective rank of A, which */ +/* > is defined as the order of the largest leading triangular */ +/* > submatrix R11 in the QR factorization with pivoting of A, */ +/* > whose estimated condition number < 1/RCOND. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RANK */ +/* > \verbatim */ +/* > RANK is INTEGER */ +/* > The effective rank of A, i.e., the order of the submatrix */ +/* > R11. This is the same as the order of the submatrix T11 */ +/* > in the complete orthogonal factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > The unblocked strategy requires that: */ +/* > LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */ +/* > where MN = f2cmin( M, N ). */ +/* > The block algorithm requires that: */ +/* > LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */ +/* > where NB is an upper bound on the blocksize returned */ +/* > by ILAENV for the routines DGEQP3, DTZRZF, STZRQF, DORMQR, */ +/* > and DORMRZ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA \n */ +/* > E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */ +/* > G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgelsy_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + jpvt, doublereal *rcond, integer *rank, doublereal *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + doublereal anrm, bnrm, smin, smax; + integer i__, j, iascl, ibscl; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer ismin, ismax; + doublereal c1, c2; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), dlaic1_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + doublereal wsize, s1, s2; + extern /* Subroutine */ int dgeqp3_(integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *), dlabad_(doublereal *, doublereal *); + integer nb; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + integer mn; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + doublereal bignum; + integer lwkmin, nb1, nb2, nb3, nb4; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + doublereal sminpr, smaxpr, smlnum; + extern /* Subroutine */ int dormrz_(char *, char *, integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int dtzrzf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --jpvt; + --work; + + /* Function Body */ + mn = f2cmin(*m,*n); + ismin = mn + 1; + ismax = (mn << 1) + 1; + +/* Test the input arguments. */ + + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -7; + } + } + +/* Figure out optimal block size */ + + if (*info == 0) { + if (mn == 0 || *nrhs == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb2 = ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb3 = ilaenv_(&c__1, "DORMQR", " ", m, n, nrhs, &c_n1, (ftnlen)6, + (ftnlen)1); + nb4 = ilaenv_(&c__1, "DORMRQ", " ", m, n, nrhs, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3); + nb = f2cmax(i__1,nb4); +/* Computing MAX */ + i__1 = mn << 1, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = mn + + *nrhs; + lwkmin = mn + f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = lwkmin, i__2 = mn + (*n << 1) + nb * (*n + 1), i__1 = f2cmax( + i__1,i__2), i__2 = (mn << 1) + nb * *nrhs; + lwkopt = f2cmax(i__1,i__2); + } + work[1] = (doublereal) lwkopt; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELSY", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (mn == 0 || *nrhs == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters */ + + smlnum = dlamch_("S") / dlamch_("P"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax entries outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + dlaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb); + *rank = 0; + goto L70; + } + + bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* Compute QR factorization with column pivoting of A: */ +/* A * P = Q * R */ + + i__1 = *lwork - mn; + dgeqp3_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &i__1, + info); + wsize = mn + work[mn + 1]; + +/* workspace: MN+2*N+NB*(N+1). */ +/* Details of Householder rotations stored in WORK(1:MN). */ + +/* Determine RANK using incremental condition estimation */ + + work[ismin] = 1.; + work[ismax] = 1.; + smax = (d__1 = a[a_dim1 + 1], abs(d__1)); + smin = smax; + if ((d__1 = a[a_dim1 + 1], abs(d__1)) == 0.) { + *rank = 0; + i__1 = f2cmax(*m,*n); + dlaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb); + goto L70; + } else { + *rank = 1; + } + +L10: + if (*rank < mn) { + i__ = *rank + 1; + dlaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[ + i__ + i__ * a_dim1], &sminpr, &s1, &c1); + dlaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[ + i__ + i__ * a_dim1], &smaxpr, &s2, &c2); + + if (smaxpr * *rcond <= sminpr) { + i__1 = *rank; + for (i__ = 1; i__ <= i__1; ++i__) { + work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1]; + work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1]; +/* L20: */ + } + work[ismin + *rank] = c1; + work[ismax + *rank] = c2; + smin = sminpr; + smax = smaxpr; + ++(*rank); + goto L10; + } + } + +/* workspace: 3*MN. */ + +/* Logically partition R = [ R11 R12 ] */ +/* [ 0 R22 ] */ +/* where R11 = R(1:RANK,1:RANK) */ + +/* [R11,R12] = [ T11, 0 ] * Y */ + + if (*rank < *n) { + i__1 = *lwork - (mn << 1); + dtzrzf_(rank, n, &a[a_offset], lda, &work[mn + 1], &work[(mn << 1) + + 1], &i__1, info); + } + +/* workspace: 2*MN. */ +/* Details of Householder rotations stored in WORK(MN+1:2*MN) */ + +/* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */ + + i__1 = *lwork - (mn << 1); + dormqr_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], & + b[b_offset], ldb, &work[(mn << 1) + 1], &i__1, info); +/* Computing MAX */ + d__1 = wsize, d__2 = (mn << 1) + work[(mn << 1) + 1]; + wsize = f2cmax(d__1,d__2); + +/* workspace: 2*MN+NB*NRHS. */ + +/* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */ + + dtrsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b54, & + a[a_offset], lda, &b[b_offset], ldb); + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *rank + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS) */ + + if (*rank < *n) { + i__1 = *n - *rank; + i__2 = *lwork - (mn << 1); + dormrz_("Left", "Transpose", n, nrhs, rank, &i__1, &a[a_offset], lda, + &work[mn + 1], &b[b_offset], ldb, &work[(mn << 1) + 1], &i__2, + info); + } + +/* workspace: 2*MN+NRHS. */ + +/* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[jpvt[i__]] = b[i__ + j * b_dim1]; +/* L50: */ + } + dcopy_(n, &work[1], &c__1, &b[j * b_dim1 + 1], &c__1); +/* L60: */ + } + +/* workspace: N. */ + +/* Undo scaling */ + + if (iascl == 1) { + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], + lda, info); + } else if (iascl == 2) { + dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], + lda, info); + } + if (ibscl == 1) { + dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L70: + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DGELSY */ + +} /* dgelsy_ */ + diff --git a/lapack-netlib/SRC/dgemlq.c b/lapack-netlib/SRC/dgemlq.c new file mode 100644 index 000000000..3baf41bd0 --- /dev/null +++ b/lapack-netlib/SRC/dgemlq.c @@ -0,0 +1,684 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEMLQ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, */ +/* $ TSIZE, C, LDC, WORK, LWORK, INFO ) */ + + +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */ +/* DOUBLE PRECISION A( LDA, * ), T( * ), C(LDC, * ), WORK( * ) */ + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEMLQ overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'T': Q**T * C C * Q**T */ +/* > where Q is a real orthogonal matrix defined as the product */ +/* > of blocked elementary reflectors computed by short wide LQ */ +/* > factorization (DGELQ) */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'T': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension */ +/* > (LDA,M) if SIDE = 'L', */ +/* > (LDA,N) if SIDE = 'R' */ +/* > Part of the data structure to represent Q as returned by DGELQ. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,K). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)). */ +/* > Part of the data structure to represent Q as returned by DGELQ. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TSIZE */ +/* > \verbatim */ +/* > TSIZE is INTEGER */ +/* > The dimension of the array T. TSIZE >= 5. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1, then a workspace query is assumed. The routine */ +/* > only calculates the size of the WORK array, returns this */ +/* > value as WORK(1), and no error message related to WORK */ +/* > is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \par Further Details */ +/* ==================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > These details are particular for this LAPACK implementation. Users should not */ +/* > take them for granted. These details may change in the future, and are not likely */ +/* > true for another LAPACK implementation. These details are relevant if one wants */ +/* > to try to understand the code. They are not part of the interface. */ +/* > */ +/* > In this version, */ +/* > */ +/* > T(2): row block size (MB) */ +/* > T(3): column block size (NB) */ +/* > T(6:TSIZE): data structure needed for Q, computed by */ +/* > DLASWLQ or DGELQT */ +/* > */ +/* > Depending on the matrix dimensions M and N, and row and column */ +/* > block sizes MB and NB returned by ILAENV, DGELQ will use either */ +/* > DLASWLQ (if the matrix is wide-and-short) or DGELQT to compute */ +/* > the LQ factorization. */ +/* > This version of DGEMLQ will use either DLAMSWLQ or DGEMLQT to */ +/* > multiply matrix Q by another matrix. */ +/* > Further Details in DLAMSWLQ or DGEMLQT. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgemlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *t, integer * + tsize, doublereal *c__, integer *ldc, doublereal *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1; + + /* Local variables */ + logical left, tran; + extern /* Subroutine */ int dlamswlq_(char *, char *, integer *, integer * + , integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *); + extern logical lsame_(char *, char *); + logical right; + integer mb, nb, mn, lw, nblcks; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran, lquery; + extern /* Subroutine */ int dgemlqt_(char *, char *, integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --t; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --work; + + /* Function Body */ + lquery = *lwork == -1; + notran = lsame_(trans, "N"); + tran = lsame_(trans, "T"); + left = lsame_(side, "L"); + right = lsame_(side, "R"); + + mb = (integer) t[2]; + nb = (integer) t[3]; + if (left) { + lw = *n * mb; + mn = *m; + } else { + lw = *m * mb; + mn = *n; + } + + if (nb > *k && mn > *k) { + if ((mn - *k) % (nb - *k) == 0) { + nblcks = (mn - *k) / (nb - *k); + } else { + nblcks = (mn - *k) / (nb - *k) + 1; + } + } else { + nblcks = 1; + } + + *info = 0; + if (! left && ! right) { + *info = -1; + } else if (! tran && ! notran) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > mn) { + *info = -5; + } else if (*lda < f2cmax(1,*k)) { + *info = -7; + } else if (*tsize < 5) { + *info = -9; + } else if (*ldc < f2cmax(1,*m)) { + *info = -11; + } else if (*lwork < f2cmax(1,lw) && ! lquery) { + *info = -13; + } + + if (*info == 0) { + work[1] = (doublereal) lw; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEMLQ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*k) == 0) { + return 0; + } + +/* Computing MAX */ + i__1 = f2cmax(*m,*n); + if (left && *m <= *k || right && *n <= *k || nb <= *k || nb >= f2cmax(i__1,* + k)) { + dgemlqt_(side, trans, m, n, k, &mb, &a[a_offset], lda, &t[6], &mb, & + c__[c_offset], ldc, &work[1], info); + } else { + dlamswlq_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], & + mb, &c__[c_offset], ldc, &work[1], lwork, info); + } + + work[1] = (doublereal) lw; + + return 0; + +/* End of DGEMLQ */ + +} /* dgemlq_ */ + diff --git a/lapack-netlib/SRC/dgemlqt.c b/lapack-netlib/SRC/dgemlqt.c new file mode 100644 index 000000000..b924652e8 --- /dev/null +++ b/lapack-netlib/SRC/dgemlqt.c @@ -0,0 +1,707 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEMLQT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEMLQT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, */ +/* C, LDC, WORK, INFO ) */ + +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDV, LDC, M, N, MB, LDT */ +/* DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEMLQT overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q C C Q */ +/* > TRANS = 'T': Q**T C C Q**T */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product of K */ +/* > elementary reflectors: */ +/* > */ +/* > Q = H(1) H(2) . . . H(K) = I - V T V**T */ +/* > */ +/* > generated using the compact WY representation as returned by DGELQT. */ +/* > */ +/* > Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'C': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MB */ +/* > \verbatim */ +/* > MB is INTEGER */ +/* > The block size used for the storage of T. K >= MB >= 1. */ +/* > This must be the same value of MB used to generate T */ +/* > in DGELQT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension */ +/* > (LDV,M) if SIDE = 'L', */ +/* > (LDV,N) if SIDE = 'R' */ +/* > The i-th row must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > DGELQT in the first K rows of its array argument A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. LDV >= f2cmax(1,K). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (LDT,K) */ +/* > The upper triangular factors of the block reflectors */ +/* > as returned by DGELQT, stored as a MB-by-K matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= MB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array. The dimension of */ +/* > WORK is N*MB if SIDE = 'L', or M*MB if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgemlqt_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *mb, doublereal *v, integer *ldv, doublereal *t, + integer *ldt, doublereal *c__, integer *ldc, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, + i__3, i__4; + + /* Local variables */ + logical left, tran; + integer i__; + extern logical lsame_(char *, char *); + logical right; + integer ib, kf; + extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), xerbla_(char *, + integer *, ftnlen); + logical notran; + integer ldwork; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + right = lsame_(side, "R"); + tran = lsame_(trans, "T"); + notran = lsame_(trans, "N"); + + if (left) { + ldwork = f2cmax(1,*n); + } else if (right) { + ldwork = f2cmax(1,*m); + } + if (! left && ! right) { + *info = -1; + } else if (! tran && ! notran) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0) { + *info = -5; + } else if (*mb < 1 || *mb > *k && *k > 0) { + *info = -6; + } else if (*ldv < f2cmax(1,*k)) { + *info = -8; + } else if (*ldt < *mb) { + *info = -10; + } else if (*ldc < f2cmax(1,*m)) { + *info = -12; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEMLQT", &i__1, (ftnlen)7); + return 0; + } + + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran) { + + i__1 = *k; + i__2 = *mb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = *mb, i__4 = *k - i__ + 1; + ib = f2cmin(i__3,i__4); + i__3 = *m - i__ + 1; + dlarfb_("L", "T", "F", "R", &i__3, n, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, + &work[1], &ldwork); + } + + } else if (right && tran) { + + i__2 = *k; + i__1 = *mb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = *mb, i__4 = *k - i__ + 1; + ib = f2cmin(i__3,i__4); + i__3 = *n - i__ + 1; + dlarfb_("R", "N", "F", "R", m, &i__3, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], + ldc, &work[1], &ldwork); + } + + } else if (left && tran) { + + kf = (*k - 1) / *mb * *mb + 1; + i__1 = -(*mb); + for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = *mb, i__3 = *k - i__ + 1; + ib = f2cmin(i__2,i__3); + i__2 = *m - i__ + 1; + dlarfb_("L", "N", "F", "R", &i__2, n, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, + &work[1], &ldwork); + } + + } else if (right && notran) { + + kf = (*k - 1) / *mb * *mb + 1; + i__1 = -(*mb); + for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = *mb, i__3 = *k - i__ + 1; + ib = f2cmin(i__2,i__3); + i__2 = *n - i__ + 1; + dlarfb_("R", "T", "F", "R", m, &i__2, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], + ldc, &work[1], &ldwork); + } + + } + + return 0; + +/* End of DGEMLQT */ + +} /* dgemlqt_ */ + diff --git a/lapack-netlib/SRC/dgemqr.c b/lapack-netlib/SRC/dgemqr.c new file mode 100644 index 000000000..43d9286ad --- /dev/null +++ b/lapack-netlib/SRC/dgemqr.c @@ -0,0 +1,685 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEMQR */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, */ +/* $ TSIZE, C, LDC, WORK, LWORK, INFO ) */ + + +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */ +/* DOUBLE PRECISION A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) */ + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEMQR overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'T': Q**T * C C * Q**T */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product */ +/* > of blocked elementary reflectors computed by tall skinny */ +/* > QR factorization (DGEQR) */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'T': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,K) */ +/* > Part of the data structure to represent Q as returned by DGEQR. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If SIDE = 'L', LDA >= f2cmax(1,M); */ +/* > if SIDE = 'R', LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)). */ +/* > Part of the data structure to represent Q as returned by DGEQR. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TSIZE */ +/* > \verbatim */ +/* > TSIZE is INTEGER */ +/* > The dimension of the array T. TSIZE >= 5. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1, then a workspace query is assumed. The routine */ +/* > only calculates the size of the WORK array, returns this */ +/* > value as WORK(1), and no error message related to WORK */ +/* > is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \par Further Details */ +/* ==================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > These details are particular for this LAPACK implementation. Users should not */ +/* > take them for granted. These details may change in the future, and are not likely */ +/* > true for another LAPACK implementation. These details are relevant if one wants */ +/* > to try to understand the code. They are not part of the interface. */ +/* > */ +/* > In this version, */ +/* > */ +/* > T(2): row block size (MB) */ +/* > T(3): column block size (NB) */ +/* > T(6:TSIZE): data structure needed for Q, computed by */ +/* > DLATSQR or DGEQRT */ +/* > */ +/* > Depending on the matrix dimensions M and N, and row and column */ +/* > block sizes MB and NB returned by ILAENV, DGEQR will use either */ +/* > DLATSQR (if the matrix is tall-and-skinny) or DGEQRT to compute */ +/* > the QR factorization. */ +/* > This version of DGEMQR will use either DLAMTSQR or DGEMQRT to */ +/* > multiply matrix Q by another matrix. */ +/* > Further Details in DLATMSQR or DGEMQRT. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgemqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *t, integer * + tsize, doublereal *c__, integer *ldc, doublereal *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1; + + /* Local variables */ + logical left, tran; + extern /* Subroutine */ int dlamtsqr_(char *, char *, integer *, integer * + , integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *); + extern logical lsame_(char *, char *); + logical right; + integer mb, nb, mn, lw, nblcks; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran, lquery; + extern /* Subroutine */ int dgemqrt_(char *, char *, integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --t; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --work; + + /* Function Body */ + lquery = *lwork == -1; + notran = lsame_(trans, "N"); + tran = lsame_(trans, "T"); + left = lsame_(side, "L"); + right = lsame_(side, "R"); + + mb = (integer) t[2]; + nb = (integer) t[3]; + if (left) { + lw = *n * nb; + mn = *m; + } else { + lw = mb * nb; + mn = *n; + } + + if (mb > *k && mn > *k) { + if ((mn - *k) % (mb - *k) == 0) { + nblcks = (mn - *k) / (mb - *k); + } else { + nblcks = (mn - *k) / (mb - *k) + 1; + } + } else { + nblcks = 1; + } + + *info = 0; + if (! left && ! right) { + *info = -1; + } else if (! tran && ! notran) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > mn) { + *info = -5; + } else if (*lda < f2cmax(1,mn)) { + *info = -7; + } else if (*tsize < 5) { + *info = -9; + } else if (*ldc < f2cmax(1,*m)) { + *info = -11; + } else if (*lwork < f2cmax(1,lw) && ! lquery) { + *info = -13; + } + + if (*info == 0) { + work[1] = (doublereal) lw; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEMQR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*k) == 0) { + return 0; + } + +/* Computing MAX */ + i__1 = f2cmax(*m,*n); + if (left && *m <= *k || right && *n <= *k || mb <= *k || mb >= f2cmax(i__1,* + k)) { + dgemqrt_(side, trans, m, n, k, &nb, &a[a_offset], lda, &t[6], &nb, & + c__[c_offset], ldc, &work[1], info); + } else { + dlamtsqr_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], & + nb, &c__[c_offset], ldc, &work[1], lwork, info); + } + + work[1] = (doublereal) lw; + + return 0; + +/* End of DGEMQR */ + +} /* dgemqr_ */ + diff --git a/lapack-netlib/SRC/dgemqrt.c b/lapack-netlib/SRC/dgemqrt.c new file mode 100644 index 000000000..906dfbd68 --- /dev/null +++ b/lapack-netlib/SRC/dgemqrt.c @@ -0,0 +1,708 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEMQRT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEMQRT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, */ +/* C, LDC, WORK, INFO ) */ + +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDV, LDC, M, N, NB, LDT */ +/* DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEMQRT overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q C C Q */ +/* > TRANS = 'T': Q**T C C Q**T */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product of K */ +/* > elementary reflectors: */ +/* > */ +/* > Q = H(1) H(2) . . . H(K) = I - V T V**T */ +/* > */ +/* > generated using the compact WY representation as returned by DGEQRT. */ +/* > */ +/* > Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'C': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB */ +/* > \verbatim */ +/* > NB is INTEGER */ +/* > The block size used for the storage of T. K >= NB >= 1. */ +/* > This must be the same value of NB used to generate T */ +/* > in CGEQRT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension (LDV,K) */ +/* > The i-th column must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > CGEQRT in the first K columns of its array argument A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If SIDE = 'L', LDA >= f2cmax(1,M); */ +/* > if SIDE = 'R', LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (LDT,K) */ +/* > The upper triangular factors of the block reflectors */ +/* > as returned by CGEQRT, stored as a NB-by-N matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= NB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array. The dimension of */ +/* > WORK is N*NB if SIDE = 'L', or M*NB if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgemqrt_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *nb, doublereal *v, integer *ldv, doublereal *t, + integer *ldt, doublereal *c__, integer *ldc, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, + i__3, i__4; + + /* Local variables */ + logical left, tran; + integer i__, q; + extern logical lsame_(char *, char *); + logical right; + integer ib, kf; + extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), xerbla_(char *, integer *, ftnlen); + logical notran; + integer ldwork; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + right = lsame_(side, "R"); + tran = lsame_(trans, "T"); + notran = lsame_(trans, "N"); + + if (left) { + ldwork = f2cmax(1,*n); + q = *m; + } else if (right) { + ldwork = f2cmax(1,*m); + q = *n; + } + if (! left && ! right) { + *info = -1; + } else if (! tran && ! notran) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > q) { + *info = -5; + } else if (*nb < 1 || *nb > *k && *k > 0) { + *info = -6; + } else if (*ldv < f2cmax(1,q)) { + *info = -8; + } else if (*ldt < *nb) { + *info = -10; + } else if (*ldc < f2cmax(1,*m)) { + *info = -12; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEMQRT", &i__1, (ftnlen)7); + return 0; + } + + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && tran) { + + i__1 = *k; + i__2 = *nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = *nb, i__4 = *k - i__ + 1; + ib = f2cmin(i__3,i__4); + i__3 = *m - i__ + 1; + dlarfb_("L", "T", "F", "C", &i__3, n, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, + &work[1], &ldwork); + } + + } else if (right && notran) { + + i__2 = *k; + i__1 = *nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = *nb, i__4 = *k - i__ + 1; + ib = f2cmin(i__3,i__4); + i__3 = *n - i__ + 1; + dlarfb_("R", "N", "F", "C", m, &i__3, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], + ldc, &work[1], &ldwork); + } + + } else if (left && notran) { + + kf = (*k - 1) / *nb * *nb + 1; + i__1 = -(*nb); + for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = *nb, i__3 = *k - i__ + 1; + ib = f2cmin(i__2,i__3); + i__2 = *m - i__ + 1; + dlarfb_("L", "N", "F", "C", &i__2, n, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, + &work[1], &ldwork); + } + + } else if (right && tran) { + + kf = (*k - 1) / *nb * *nb + 1; + i__1 = -(*nb); + for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = *nb, i__3 = *k - i__ + 1; + ib = f2cmin(i__2,i__3); + i__2 = *n - i__ + 1; + dlarfb_("R", "T", "F", "C", m, &i__2, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], + ldc, &work[1], &ldwork); + } + + } + + return 0; + +/* End of DGEMQRT */ + +} /* dgemqrt_ */ + diff --git a/lapack-netlib/SRC/dgeql2.c b/lapack-netlib/SRC/dgeql2.c new file mode 100644 index 000000000..040cc1916 --- /dev/null +++ b/lapack-netlib/SRC/dgeql2.c @@ -0,0 +1,591 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQL2 computes the QL factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQL2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQL2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQL2 computes a QL factorization of a real m by n matrix A: */ +/* > A = Q * L. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, if m >= n, the lower triangle of the subarray */ +/* > A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; */ +/* > if m <= n, the elements on and below the (n-m)-th */ +/* > superdiagonal contain the m by n lower trapezoidal matrix L; */ +/* > the remaining elements, with the array TAU, represent the */ +/* > orthogonal matrix Q as a product of elementary reflectors */ +/* > (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */ +/* > A(1:m-k+i-1,n-k+i), and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeql2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen); + doublereal aii; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQL2", &i__1, (ftnlen)6); + return 0; + } + + k = f2cmin(*m,*n); + + for (i__ = k; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(1:m-k+i-1,n-k+i) */ + + i__1 = *m - k + i__; + dlarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[(*n - k + + i__) * a_dim1 + 1], &c__1, &tau[i__]); + +/* Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left */ + + aii = a[*m - k + i__ + (*n - k + i__) * a_dim1]; + a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.; + i__1 = *m - k + i__; + i__2 = *n - k + i__ - 1; + dlarf_("Left", &i__1, &i__2, &a[(*n - k + i__) * a_dim1 + 1], &c__1, & + tau[i__], &a[a_offset], lda, &work[1]); + a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DGEQL2 */ + +} /* dgeql2_ */ + diff --git a/lapack-netlib/SRC/dgeqlf.c b/lapack-netlib/SRC/dgeqlf.c new file mode 100644 index 000000000..aa01ef1c0 --- /dev/null +++ b/lapack-netlib/SRC/dgeqlf.c @@ -0,0 +1,711 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQLF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQLF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQLF computes a QL factorization of a real M-by-N matrix A: */ +/* > A = Q * L. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if m >= n, the lower triangle of the subarray */ +/* > A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; */ +/* > if m <= n, the elements on and below the (n-m)-th */ +/* > superdiagonal contain the M-by-N lower trapezoidal matrix L; */ +/* > the remaining elements, with the array TAU, represent the */ +/* > orthogonal matrix Q as a product of elementary reflectors */ +/* > (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For optimum performance LWORK >= N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */ +/* > A(1:m-k+i-1,n-k+i), and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqlf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo; + extern /* Subroutine */ int dgeql2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer ib, nb, ki, kk; + extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer mu, nu, nx; + extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + + if (*info == 0) { + k = f2cmin(*m,*n); + if (k == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, "DGEQLF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -7; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQLF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + return 0; + } + + nbmin = 2; + nx = 1; + iws = *n; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQLF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQLF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially. */ +/* The last kk columns are handled by the block method. */ + + ki = (k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = k, i__2 = ki + nb; + kk = f2cmin(i__1,i__2); + + i__1 = k - kk + 1; + i__2 = -nb; + for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ + += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the QL factorization of the current block */ +/* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */ + + i__3 = *m - k + i__ + ib - 1; + dgeql2_(&i__3, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &tau[ + i__], &work[1], &iinfo); + if (*n - k + i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *m - k + i__ + ib - 1; + dlarft_("Backward", "Columnwise", &i__3, &ib, &a[(*n - k + + i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */ + + i__3 = *m - k + i__ + ib - 1; + i__4 = *n - k + i__ - 1; + dlarfb_("Left", "Transpose", "Backward", "Columnwise", &i__3, + &i__4, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, & + work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], & + ldwork); + } +/* L10: */ + } + mu = *m - k + i__ + nb - 1; + nu = *n - k + i__ + nb - 1; + } else { + mu = *m; + nu = *n; + } + +/* Use unblocked code to factor the last or only block */ + + if (mu > 0 && nu > 0) { + dgeql2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQLF */ + +} /* dgeqlf_ */ + diff --git a/lapack-netlib/SRC/dgeqp3.c b/lapack-netlib/SRC/dgeqp3.c new file mode 100644 index 000000000..98279844e --- /dev/null +++ b/lapack-netlib/SRC/dgeqp3.c @@ -0,0 +1,795 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQP3 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQP3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* INTEGER JPVT( * ) */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQP3 computes a QR factorization with column pivoting of a */ +/* > matrix A: A*P = Q*R using Level 3 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the upper triangle of the array contains the */ +/* > f2cmin(M,N)-by-N upper trapezoidal matrix R; the elements below */ +/* > the diagonal, together with the array TAU, represent the */ +/* > orthogonal matrix Q as a product of f2cmin(M,N) elementary */ +/* > reflectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] JPVT */ +/* > \verbatim */ +/* > JPVT is INTEGER array, dimension (N) */ +/* > On entry, if JPVT(J).ne.0, the J-th column of A is permuted */ +/* > to the front of A*P (a leading column); if JPVT(J)=0, */ +/* > the J-th column of A is a free column. */ +/* > On exit, if JPVT(J)=K, then the J-th column of A*P was the */ +/* > the K-th column of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO=0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 3*N+1. */ +/* > For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB */ +/* > is the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real/complex vector */ +/* > with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in */ +/* > A(i+1:m,i), and tau in TAU(i). */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ +/* > X. Sun, Computer Science Dept., Duke University, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqp3_(integer *m, integer *n, doublereal *a, integer * + lda, integer *jpvt, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer nfxd; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + integer j, nbmin, minmn; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer minws; + extern /* Subroutine */ int dlaqp2_(integer *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *); + integer jb, na, nb, sm, sn, nx; + extern /* Subroutine */ int dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dlaqps_(integer *, integer *, integer *, + integer *, integer *, doublereal *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + integer topbmn, sminmn; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + integer fjb, iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test input arguments */ +/* ==================== */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --jpvt; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + + if (*info == 0) { + minmn = f2cmin(*m,*n); + if (minmn == 0) { + iws = 1; + lwkopt = 1; + } else { + iws = *n * 3 + 1; + nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = (*n << 1) + (*n + 1) * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < iws && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQP3", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Move initial columns up front. */ + + nfxd = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (jpvt[j] != 0) { + if (j != nfxd) { + dswap_(m, &a[j * a_dim1 + 1], &c__1, &a[nfxd * a_dim1 + 1], & + c__1); + jpvt[j] = jpvt[nfxd]; + jpvt[nfxd] = j; + } else { + jpvt[j] = j; + } + ++nfxd; + } else { + jpvt[j] = j; + } +/* L10: */ + } + --nfxd; + +/* Factorize fixed columns */ +/* ======================= */ + +/* Compute the QR factorization of fixed columns and update */ +/* remaining columns. */ + + if (nfxd > 0) { + na = f2cmin(*m,nfxd); +/* CC CALL DGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) */ + dgeqrf_(m, &na, &a[a_offset], lda, &tau[1], &work[1], lwork, info); +/* Computing MAX */ + i__1 = iws, i__2 = (integer) work[1]; + iws = f2cmax(i__1,i__2); + if (na < *n) { +/* CC CALL DORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, */ +/* CC $ TAU, A( 1, NA+1 ), LDA, WORK, INFO ) */ + i__1 = *n - na; + dormqr_("Left", "Transpose", m, &i__1, &na, &a[a_offset], lda, & + tau[1], &a[(na + 1) * a_dim1 + 1], lda, &work[1], lwork, + info); +/* Computing MAX */ + i__1 = iws, i__2 = (integer) work[1]; + iws = f2cmax(i__1,i__2); + } + } + +/* Factorize free columns */ +/* ====================== */ + + if (nfxd < minmn) { + + sm = *m - nfxd; + sn = *n - nfxd; + sminmn = minmn - nfxd; + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "DGEQRF", " ", &sm, &sn, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nbmin = 2; + nx = 0; + + if (nb > 1 && nb < sminmn) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", &sm, &sn, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + + + if (nx < sminmn) { + +/* Determine if workspace is large enough for blocked code. */ + + minws = (sn << 1) + (sn + 1) * nb; + iws = f2cmax(iws,minws); + if (*lwork < minws) { + +/* Not enough workspace to use optimal NB: Reduce NB and */ +/* determine the minimum value of NB. */ + + nb = (*lwork - (sn << 1)) / (sn + 1); +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", &sm, &sn, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + + + } + } + } + +/* Initialize partial column norms. The first N elements of work */ +/* store the exact column norms. */ + + i__1 = *n; + for (j = nfxd + 1; j <= i__1; ++j) { + work[j] = dnrm2_(&sm, &a[nfxd + 1 + j * a_dim1], &c__1); + work[*n + j] = work[j]; +/* L20: */ + } + + if (nb >= nbmin && nb < sminmn && nx < sminmn) { + +/* Use blocked code initially. */ + + j = nfxd + 1; + +/* Compute factorization: while loop. */ + + + topbmn = minmn - nx; +L30: + if (j <= topbmn) { +/* Computing MIN */ + i__1 = nb, i__2 = topbmn - j + 1; + jb = f2cmin(i__1,i__2); + +/* Factorize JB columns among columns J:N. */ + + i__1 = *n - j + 1; + i__2 = j - 1; + i__3 = *n - j + 1; + dlaqps_(m, &i__1, &i__2, &jb, &fjb, &a[j * a_dim1 + 1], lda, & + jpvt[j], &tau[j], &work[j], &work[*n + j], &work[(*n + << 1) + 1], &work[(*n << 1) + jb + 1], &i__3); + + j += fjb; + goto L30; + } + } else { + j = nfxd + 1; + } + +/* Use unblocked code to factor the last or only block. */ + + + if (j <= minmn) { + i__1 = *n - j + 1; + i__2 = j - 1; + dlaqp2_(m, &i__1, &i__2, &a[j * a_dim1 + 1], lda, &jpvt[j], &tau[ + j], &work[j], &work[*n + j], &work[(*n << 1) + 1]); + } + + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQP3 */ + +} /* dgeqp3_ */ + diff --git a/lapack-netlib/SRC/dgeqr.c b/lapack-netlib/SRC/dgeqr.c new file mode 100644 index 000000000..f47dd1844 --- /dev/null +++ b/lapack-netlib/SRC/dgeqr.c @@ -0,0 +1,735 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQR */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, M, N, TSIZE, LWORK */ +/* DOUBLE PRECISION A( LDA, * ), T( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQR computes a QR factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a M-by-M orthogonal matrix; */ +/* > R is an upper-triangular N-by-N matrix; */ +/* > 0 is a (M-N)-by-N zero matrix, if M > N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix R */ +/* > (R is upper triangular if M >= N); */ +/* > the elements below the diagonal are used to store part of the */ +/* > data structure to represent Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)) */ +/* > On exit, if INFO = 0, T(1) returns optimal (or either minimal */ +/* > or optimal, if query is assumed) TSIZE. See TSIZE for details. */ +/* > Remaining T contains part of the data structure used to represent Q. */ +/* > If one wants to apply or construct Q, then one needs to keep T */ +/* > (in addition to A) and pass it to further subroutines. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TSIZE */ +/* > \verbatim */ +/* > TSIZE is INTEGER */ +/* > If TSIZE >= 5, the dimension of the array T. */ +/* > If TSIZE = -1 or -2, then a workspace query is assumed. The routine */ +/* > only calculates the sizes of the T and WORK arrays, returns these */ +/* > values as the first entries of the T and WORK arrays, and no error */ +/* > message related to T or WORK is issued by XERBLA. */ +/* > If TSIZE = -1, the routine calculates optimal size of T for the */ +/* > optimum performance and returns this value in T(1). */ +/* > If TSIZE = -2, the routine calculates minimal size of T and */ +/* > returns this value in T(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */ +/* > or optimal, if query was assumed) LWORK. */ +/* > See LWORK for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1 or -2, then a workspace query is assumed. The routine */ +/* > only calculates the sizes of the T and WORK arrays, returns these */ +/* > values as the first entries of the T and WORK arrays, and no error */ +/* > message related to T or WORK is issued by XERBLA. */ +/* > If LWORK = -1, the routine calculates optimal size of WORK for the */ +/* > optimal performance and returns this value in WORK(1). */ +/* > If LWORK = -2, the routine calculates minimal size of WORK and */ +/* > returns this value in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \par Further Details */ +/* ==================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The goal of the interface is to give maximum freedom to the developers for */ +/* > creating any QR factorization algorithm they wish. The triangular */ +/* > (trapezoidal) R has to be stored in the upper part of A. The lower part of A */ +/* > and the array T can be used to store any relevant information for applying or */ +/* > constructing the Q factor. The WORK array can safely be discarded after exit. */ +/* > */ +/* > Caution: One should not expect the sizes of T and WORK to be the same from one */ +/* > LAPACK implementation to the other, or even from one execution to the other. */ +/* > A workspace query (for T and WORK) is needed at each execution. However, */ +/* > for a given execution, the size of T and WORK are fixed and will not change */ +/* > from one query to the next. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* > \par Further Details particular to this LAPACK implementation: */ +/* ============================================================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > These details are particular for this LAPACK implementation. Users should not */ +/* > take them for granted. These details may change in the future, and are not likely */ +/* > true for another LAPACK implementation. These details are relevant if one wants */ +/* > to try to understand the code. They are not part of the interface. */ +/* > */ +/* > In this version, */ +/* > */ +/* > T(2): row block size (MB) */ +/* > T(3): column block size (NB) */ +/* > T(6:TSIZE): data structure needed for Q, computed by */ +/* > DLATSQR or DGEQRT */ +/* > */ +/* > Depending on the matrix dimensions M and N, and row and column */ +/* > block sizes MB and NB returned by ILAENV, DGEQR will use either */ +/* > DLATSQR (if the matrix is tall-and-skinny) or DGEQRT to compute */ +/* > the QR factorization. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqr_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *t, integer *tsize, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + logical mint, minw; + integer mb, nb, nblcks; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dgeqrt_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + logical lminws, lquery; + integer mintsz; + extern /* Subroutine */ int dlatsqr_(integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --t; + --work; + + /* Function Body */ + *info = 0; + + lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2; + + mint = FALSE_; + minw = FALSE_; + if (*tsize == -2 || *lwork == -2) { + if (*tsize != -1) { + mint = TRUE_; + } + if (*lwork != -1) { + minw = TRUE_; + } + } + +/* Determine the block size */ + + if (f2cmin(*m,*n) > 0) { + mb = ilaenv_(&c__1, "DGEQR ", " ", m, n, &c__1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb = ilaenv_(&c__1, "DGEQR ", " ", m, n, &c__2, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + mb = *m; + nb = 1; + } + if (mb > *m || mb <= *n) { + mb = *m; + } + if (nb > f2cmin(*m,*n) || nb < 1) { + nb = 1; + } + mintsz = *n + 5; + if (mb > *n && *m > *n) { + if ((*m - *n) % (mb - *n) == 0) { + nblcks = (*m - *n) / (mb - *n); + } else { + nblcks = (*m - *n) / (mb - *n) + 1; + } + } else { + nblcks = 1; + } + +/* Determine if the workspace size satisfies minimal size */ + + lminws = FALSE_; +/* Computing MAX */ + i__1 = 1, i__2 = nb * *n * nblcks + 5; + if ((*tsize < f2cmax(i__1,i__2) || *lwork < nb * *n) && *lwork >= *n && * + tsize >= mintsz && ! lquery) { +/* Computing MAX */ + i__1 = 1, i__2 = nb * *n * nblcks + 5; + if (*tsize < f2cmax(i__1,i__2)) { + lminws = TRUE_; + nb = 1; + mb = *m; + } + if (*lwork < nb * *n) { + lminws = TRUE_; + nb = 1; + } + } + + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = nb * *n * nblcks + 5; + if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n * nb; + if (*lwork < f2cmax(i__1,i__2) && ! lquery && ! lminws) { + *info = -8; + } + } + } + + if (*info == 0) { + if (mint) { + t[1] = (doublereal) mintsz; + } else { + t[1] = (doublereal) (nb * *n * nblcks + 5); + } + t[2] = (doublereal) mb; + t[3] = (doublereal) nb; + if (minw) { + work[1] = (doublereal) f2cmax(1,*n); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = nb * *n; + work[1] = (doublereal) f2cmax(i__1,i__2); + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQR", &i__1, (ftnlen)5); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (f2cmin(*m,*n) == 0) { + return 0; + } + +/* The QR Decomposition */ + + if (*m <= *n || mb <= *n || mb >= *m) { + dgeqrt_(m, n, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], info); + } else { + dlatsqr_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], + lwork, info); + } + +/* Computing MAX */ + i__1 = 1, i__2 = nb * *n; + work[1] = (doublereal) f2cmax(i__1,i__2); + + return 0; + +/* End of DGEQR */ + +} /* dgeqr_ */ + diff --git a/lapack-netlib/SRC/dgeqr2.c b/lapack-netlib/SRC/dgeqr2.c new file mode 100644 index 000000000..e5ed2b269 --- /dev/null +++ b/lapack-netlib/SRC/dgeqr2.c @@ -0,0 +1,602 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQR2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQR2 computes a QR factorization of a real m-by-n matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a m-by-m orthogonal matrix; */ +/* > R is an upper-triangular n-by-n matrix; */ +/* > 0 is a (m-n)-by-n zero matrix, if m > n. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */ +/* > upper triangular if m >= n); the elements below the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen); + doublereal aii; + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQR2", &i__1, (ftnlen)6); + return 0; + } + + k = f2cmin(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGEQR2 */ + +} /* dgeqr2_ */ + diff --git a/lapack-netlib/SRC/dgeqr2p.c b/lapack-netlib/SRC/dgeqr2p.c new file mode 100644 index 000000000..1bd3a5c8c --- /dev/null +++ b/lapack-netlib/SRC/dgeqr2p.c @@ -0,0 +1,607 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQR2P computes the QR factorization of a general rectangular matrix with non-negative diagona +l elements using an unblocked algorithm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQR2P + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQR2P computes a QR factorization of a real m-by-n matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a m-by-m orthogonal matrix; */ +/* > R is an upper-triangular n-by-n matrix with nonnegative diagonal */ +/* > entries; */ +/* > 0 is a (m-n)-by-n zero matrix, if m > n. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */ +/* > upper triangular if m >= n). The diagonal entries of R are */ +/* > nonnegative; the elements below the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > See Lapack Working Note 203 for details */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqr2p_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), xerbla_(char *, integer *, ftnlen); + doublereal aii; + extern /* Subroutine */ int dlarfgp_(integer *, doublereal *, doublereal * + , integer *, doublereal *); + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQR2P", &i__1, (ftnlen)7); + return 0; + } + + k = f2cmin(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfgp_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * + a_dim1], &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGEQR2P */ + +} /* dgeqr2p_ */ + diff --git a/lapack-netlib/SRC/dgeqrf.c b/lapack-netlib/SRC/dgeqrf.c new file mode 100644 index 000000000..84ab43763 --- /dev/null +++ b/lapack-netlib/SRC/dgeqrf.c @@ -0,0 +1,702 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQRF computes a QR factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a M-by-M orthogonal matrix; */ +/* > R is an upper-triangular N-by-N matrix; */ +/* > 0 is a (M-N)-by-N zero matrix, if M > N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */ +/* > upper triangular if m >= n); the elements below the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of f2cmin(m,n) elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For optimum performance LWORK >= N*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo; + extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer ib, nb; + extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer nx; + extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = f2cmin(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the QR factorization of the current block */ +/* A(i:m,i:i+ib-1) */ + + i__3 = *m - i__ + 1; + dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *m - i__ + 1; + dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H**T to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQRF */ + +} /* dgeqrf_ */ + diff --git a/lapack-netlib/SRC/dgeqrfp.c b/lapack-netlib/SRC/dgeqrfp.c new file mode 100644 index 000000000..9cf4ef1ee --- /dev/null +++ b/lapack-netlib/SRC/dgeqrfp.c @@ -0,0 +1,705 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQRFP */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQRFP + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQR2P computes a QR factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a M-by-M orthogonal matrix; */ +/* > R is an upper-triangular N-by-N matrix with nonnegative diagonal */ +/* > entries; */ +/* > 0 is a (M-N)-by-N zero matrix, if M > N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */ +/* > upper triangular if m >= n). The diagonal entries of R */ +/* > are nonnegative; the elements below the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of f2cmin(m,n) elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For optimum performance LWORK >= N*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > See Lapack Working Note 203 for details */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqrfp_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo, ib, nb; + extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer nx; + extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + extern /* Subroutine */ int dgeqr2p_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iws; + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQRFP", &i__1, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = f2cmin(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the QR factorization of the current block */ +/* A(i:m,i:i+ib-1) */ + + i__3 = *m - i__ + 1; + dgeqr2p_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *m - i__ + 1; + dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H**T to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgeqr2p_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQRFP */ + +} /* dgeqrfp_ */ + diff --git a/lapack-netlib/SRC/dgeqrt.c b/lapack-netlib/SRC/dgeqrt.c new file mode 100644 index 000000000..5568853eb --- /dev/null +++ b/lapack-netlib/SRC/dgeqrt.c @@ -0,0 +1,631 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQRT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQRT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) */ + +/* INTEGER INFO, LDA, LDT, M, N, NB */ +/* DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQRT computes a blocked QR factorization of a real M-by-N matrix A */ +/* > using the compact WY representation of Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB */ +/* > \verbatim */ +/* > NB is INTEGER */ +/* > The block size to be used in the blocked QR. MIN(M,N) >= NB >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */ +/* > upper triangular if M >= N); the elements below the diagonal */ +/* > are the columns of V. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (LDT,MIN(M,N)) */ +/* > The upper triangular block reflectors stored in compact form */ +/* > as a sequence of upper triangular blocks. See below */ +/* > for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= NB. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (NB*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th column */ +/* > below the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 ) */ +/* > ( v1 1 ) */ +/* > ( v1 v2 1 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. */ +/* > */ +/* > Let K=MIN(M,N). The number of blocks is B = ceiling(K/NB), where each */ +/* > block is of order NB except for the last block, which is of order */ +/* > IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block */ +/* > reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB */ +/* > for the last block) T's are stored in the NB-by-K matrix T as */ +/* > */ +/* > T = (T1 T2 ... TB). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqrt_(integer *m, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *t, integer *ldt, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, k, iinfo, ib; + extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), xerbla_(char *, + integer *, ftnlen), dgeqrt2_(integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), dgeqrt3_(integer * + , integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nb < 1 || *nb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldt < *nb) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQRT", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + k = f2cmin(*m,*n); + if (k == 0) { + return 0; + } + +/* Blocked loop of length K */ + + i__1 = k; + i__2 = *nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,*nb); + +/* Compute the QR factorization of the current block A(I:M,I:I+IB-1) */ + + if (TRUE_) { + i__3 = *m - i__ + 1; + dgeqrt3_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 + + 1], ldt, &iinfo); + } else { + i__3 = *m - i__ + 1; + dgeqrt2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 + + 1], ldt, &iinfo); + } + if (i__ + ib <= *n) { + +/* Update by applying H**T to A(I:M,I+IB:N) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + i__5 = *n - i__ - ib + 1; + dlarfb_("L", "T", "F", "C", &i__3, &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + (i__ + + ib) * a_dim1], lda, &work[1], &i__5); + } + } + return 0; + +/* End of DGEQRT */ + +} /* dgeqrt_ */ + diff --git a/lapack-netlib/SRC/dgeqrt2.c b/lapack-netlib/SRC/dgeqrt2.c new file mode 100644 index 000000000..8165f9bf4 --- /dev/null +++ b/lapack-netlib/SRC/dgeqrt2.c @@ -0,0 +1,648 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY re +presentation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQRT2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQRT2( M, N, A, LDA, T, LDT, INFO ) */ + +/* INTEGER INFO, LDA, LDT, M, N */ +/* DOUBLE PRECISION A( LDA, * ), T( LDT, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQRT2 computes a QR factorization of a real M-by-N matrix A, */ +/* > using the compact WY representation of Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the real M-by-N matrix A. On exit, the elements on and */ +/* > above the diagonal contain the N-by-N upper triangular matrix R; the */ +/* > elements below the diagonal are the columns of V. See below for */ +/* > further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (LDT,N) */ +/* > The N-by-N upper triangular factor of the block reflector. */ +/* > The elements on and above the diagonal contain the block */ +/* > reflector T; the elements below the diagonal are not used. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th column */ +/* > below the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 ) */ +/* > ( v1 1 ) */ +/* > ( v1 v2 1 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. The */ +/* > block reflector H is then given by */ +/* > */ +/* > H = I - V * T * V**T */ +/* > */ +/* > where V**T is the transpose of V. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqrt2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *t, integer *ldt, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3; + + /* Local variables */ + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer i__, k; + doublereal alpha; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), dtrmv_(char *, + char *, char *, integer *, doublereal *, integer *, doublereal *, + integer *), dlarfg_(integer *, doublereal + *, doublereal *, integer *, doublereal *), xerbla_(char *, + integer *, ftnlen); + doublereal aii; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*ldt < f2cmax(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQRT2", &i__1, (ftnlen)7); + return 0; + } + + k = f2cmin(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1] + , &c__1, &t[i__ + t_dim1]); + if (i__ < *n) { + +/* Apply H(i) to A(I:M,I+1:N) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dgemv_("T", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], + lda, &a[i__ + i__ * a_dim1], &c__1, &c_b7, &t[*n * t_dim1 + + 1], &c__1); + +/* A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H */ + + alpha = -t[i__ + t_dim1]; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dger_(&i__2, &i__3, &alpha, &a[i__ + i__ * a_dim1], &c__1, &t[*n * + t_dim1 + 1], &c__1, &a[i__ + (i__ + 1) * a_dim1], lda); + a[i__ + i__ * a_dim1] = aii; + } + } + + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* T(1:I-1,I) := alpha * A(I:M,1:I-1)**T * A(I:M,I) */ + + alpha = -t[i__ + t_dim1]; + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_("T", &i__2, &i__3, &alpha, &a[i__ + a_dim1], lda, &a[i__ + i__ + * a_dim1], &c__1, &c_b7, &t[i__ * t_dim1 + 1], &c__1); + a[i__ + i__ * a_dim1] = aii; + +/* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) */ + + i__2 = i__ - 1; + dtrmv_("U", "N", "N", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], + &c__1); + +/* T(I,I) = tau(I) */ + + t[i__ + i__ * t_dim1] = t[i__ + t_dim1]; + t[i__ + t_dim1] = 0.; + } + +/* End of DGEQRT2 */ + + return 0; +} /* dgeqrt2_ */ + diff --git a/lapack-netlib/SRC/dgeqrt3.c b/lapack-netlib/SRC/dgeqrt3.c new file mode 100644 index 000000000..f0f8299b8 --- /dev/null +++ b/lapack-netlib/SRC/dgeqrt3.c @@ -0,0 +1,681 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the c +ompact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQRT3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQRT3( M, N, A, LDA, T, LDT, INFO ) */ + +/* INTEGER INFO, LDA, M, N, LDT */ +/* DOUBLE PRECISION A( LDA, * ), T( LDT, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQRT3 recursively computes a QR factorization of a real M-by-N */ +/* > matrix A, using the compact WY representation of Q. */ +/* > */ +/* > Based on the algorithm of Elmroth and Gustavson, */ +/* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the real M-by-N matrix A. On exit, the elements on and */ +/* > above the diagonal contain the N-by-N upper triangular matrix R; the */ +/* > elements below the diagonal are the columns of V. See below for */ +/* > further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (LDT,N) */ +/* > The N-by-N upper triangular factor of the block reflector. */ +/* > The elements on and above the diagonal contain the block */ +/* > reflector T; the elements below the diagonal are not used. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th column */ +/* > below the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 ) */ +/* > ( v1 1 ) */ +/* > ( v1 v2 1 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. The */ +/* > block reflector H is then given by */ +/* > */ +/* > H = I - V * T * V**T */ +/* > */ +/* > where V**T is the transpose of V. */ +/* > */ +/* > For details of the algorithm, see Elmroth and Gustavson (cited above). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqrt3_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *t, integer *ldt, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iinfo; + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer i1, j1, n1, n2; + extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *), xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -2; + } else if (*m < *n) { + *info = -1; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*ldt < f2cmax(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQRT3", &i__1, (ftnlen)7); + return 0; + } + + if (*n == 1) { + +/* Compute Householder transform when N=1 */ + + dlarfg_(m, &a[a_dim1 + 1], &a[f2cmin(2,*m) + a_dim1], &c__1, &t[t_dim1 + + 1]); + + } else { + +/* Otherwise, split A into blocks... */ + + n1 = *n / 2; + n2 = *n - n1; +/* Computing MIN */ + i__1 = n1 + 1; + j1 = f2cmin(i__1,*n); +/* Computing MIN */ + i__1 = *n + 1; + i1 = f2cmin(i__1,*m); + +/* Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */ + + dgeqrt3_(m, &n1, &a[a_offset], lda, &t[t_offset], ldt, &iinfo); + +/* Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] */ + + i__1 = n2; + for (j = 1; j <= i__1; ++j) { + i__2 = n1; + for (i__ = 1; i__ <= i__2; ++i__) { + t[i__ + (j + n1) * t_dim1] = a[i__ + (j + n1) * a_dim1]; + } + } + dtrmm_("L", "L", "T", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 * + t_dim1 + 1], ldt) + ; + + i__1 = *m - n1; + dgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[j1 + a_dim1], lda, &a[j1 + + j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt); + + dtrmm_("L", "U", "T", "N", &n1, &n2, &c_b8, &t[t_offset], ldt, &t[j1 * + t_dim1 + 1], ldt) + ; + + i__1 = *m - n1; + dgemm_("N", "N", &i__1, &n2, &n1, &c_b20, &a[j1 + a_dim1], lda, &t[j1 + * t_dim1 + 1], ldt, &c_b8, &a[j1 + j1 * a_dim1], lda); + + dtrmm_("L", "L", "N", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 * + t_dim1 + 1], ldt) + ; + + i__1 = n2; + for (j = 1; j <= i__1; ++j) { + i__2 = n1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + (j + n1) * a_dim1] -= t[i__ + (j + n1) * t_dim1]; + } + } + +/* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */ + + i__1 = *m - n1; + dgeqrt3_(&i__1, &n2, &a[j1 + j1 * a_dim1], lda, &t[j1 + j1 * t_dim1], + ldt, &iinfo); + +/* Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 */ + + i__1 = n1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = n2; + for (j = 1; j <= i__2; ++j) { + t[i__ + (j + n1) * t_dim1] = a[j + n1 + i__ * a_dim1]; + } + } + + dtrmm_("R", "L", "N", "U", &n1, &n2, &c_b8, &a[j1 + j1 * a_dim1], lda, + &t[j1 * t_dim1 + 1], ldt); + + i__1 = *m - *n; + dgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[i1 + a_dim1], lda, &a[i1 + + j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt); + + dtrmm_("L", "U", "N", "N", &n1, &n2, &c_b20, &t[t_offset], ldt, &t[j1 + * t_dim1 + 1], ldt); + + dtrmm_("R", "U", "N", "N", &n1, &n2, &c_b8, &t[j1 + j1 * t_dim1], ldt, + &t[j1 * t_dim1 + 1], ldt); + +/* Y = (Y1,Y2); R = [ R1 A(1:N1,J1:N) ]; T = [T1 T3] */ +/* [ 0 R2 ] [ 0 T2] */ + + } + + return 0; + +/* End of DGEQRT3 */ + +} /* dgeqrt3_ */ + diff --git a/lapack-netlib/SRC/dgerfs.c b/lapack-netlib/SRC/dgerfs.c new file mode 100644 index 000000000..83be100ab --- /dev/null +++ b/lapack-netlib/SRC/dgerfs.c @@ -0,0 +1,882 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGERFS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGERFS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */ +/* X, LDX, FERR, BERR, WORK, IWORK, INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGERFS improves the computed solution to a system of linear */ +/* > equations and provides error bounds and backward error estimates for */ +/* > the solution. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The original N-by-N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AF */ +/* > \verbatim */ +/* > AF is DOUBLE PRECISION array, dimension (LDAF,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by DGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by DGETRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (3*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > ITMAX is the maximum number of steps of iterative refinement. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgerfs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * + ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, + doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer kase; + doublereal safe1, safe2; + integer i__, j, k; + doublereal s; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer isave[3]; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), daxpy_(integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *); + integer count; + extern /* Subroutine */ int dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal dlamch_(char *); + doublereal xk; + integer nz; + doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dgetrs_( + char *, integer *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *); + logical notran; + char transt[1]; + doublereal lstres, eps; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -10; + } else if (*ldx < f2cmax(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGERFS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = dlamch_("Epsilon"); + safmin = dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A, A**T, or A**H, depending on TRANS. */ + + dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + dgemv_(trans, n, n, &c_b15, &a[a_offset], lda, &x[j * x_dim1 + 1], & + c__1, &c_b17, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); +/* L30: */ + } + +/* Compute abs(op(A))*abs(X) + abs(B). */ + + if (notran) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (d__1 = x[k + j * x_dim1], abs(d__1)); + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk; +/* L40: */ + } +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[ + i__ + j * x_dim1], abs(d__2)); +/* L60: */ + } + work[k] += s; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ + i__]; + s = f2cmax(d__2,d__3); + } else { +/* Computing MAX */ + d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) + / (work[i__] + safe1); + s = f2cmax(d__2,d__3); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n + + 1], n, info); + daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use DLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * + work[i__] + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)**T). */ + + dgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & + work[*n + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L110: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L120: */ + } + dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & + work[*n + 1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1)); + lstres = f2cmax(d__2,d__3); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of DGERFS */ + +} /* dgerfs_ */ + diff --git a/lapack-netlib/SRC/dgerfsx.c b/lapack-netlib/SRC/dgerfsx.c new file mode 100644 index 000000000..7610f487e --- /dev/null +++ b/lapack-netlib/SRC/dgerfsx.c @@ -0,0 +1,1148 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGERFSX */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGERFSX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, */ +/* R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, */ +/* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, */ +/* WORK, IWORK, INFO ) */ + +/* CHARACTER TRANS, EQUED */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */ +/* $ N_ERR_BNDS */ +/* DOUBLE PRECISION RCOND */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ X( LDX , * ), WORK( * ) */ +/* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), */ +/* $ ERR_BNDS_NORM( NRHS, * ), */ +/* $ ERR_BNDS_COMP( NRHS, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGERFSX improves the computed solution to a system of linear */ +/* > equations and provides error bounds and backward error estimates */ +/* > for the solution. In addition to normwise error bound, the code */ +/* > provides maximum componentwise error bound if possible. See */ +/* > comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */ +/* > error bounds. */ +/* > */ +/* > The original system of linear equations may have been equilibrated */ +/* > before calling this routine, as described by arguments EQUED, R */ +/* > and C below. In this case, the solution and error bounds returned */ +/* > are for the original unequilibrated system. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \verbatim */ +/* > Some optional parameters are bundled in the PARAMS array. These */ +/* > settings determine how refinement is performed, but often the */ +/* > defaults are acceptable. If the defaults are acceptable, users */ +/* > can pass NPARAMS = 0 which prevents the source code from accessing */ +/* > the PARAMS argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done to A */ +/* > before calling this routine. This is needed to compute */ +/* > the solution and error bounds correctly. */ +/* > = 'N': No equilibration */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > The right hand side B has been changed accordingly. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The original N-by-N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AF */ +/* > \verbatim */ +/* > AF is DOUBLE PRECISION array, dimension (LDAF,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by DGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. */ +/* > If R is accessed, each element of R should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. */ +/* > If C is accessed, each element of C should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by DGETRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > Componentwise relative backward error. This is the */ +/* > componentwise relative backward error of each solution vector X(j) */ +/* > (i.e., the smallest relative change in any element of A or B that */ +/* > makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_ERR_BNDS */ +/* > \verbatim */ +/* > N_ERR_BNDS is INTEGER */ +/* > Number of error bounds to return for each right hand side */ +/* > and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* > ERR_BNDS_COMP below. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_NORM */ +/* > \verbatim */ +/* > ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_COMP */ +/* > \verbatim */ +/* > ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERR_BNDS_COMP is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NPARAMS */ +/* > \verbatim */ +/* > NPARAMS is INTEGER */ +/* > Specifies the number of parameters set in PARAMS. If <= 0, the */ +/* > PARAMS array is never referenced and default values are used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PARAMS */ +/* > \verbatim */ +/* > PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) */ +/* > Specifies algorithm parameters. If an entry is < 0.0, then */ +/* > that entry will be filled with default value used for that */ +/* > parameter. Only positions up to NPARAMS are accessed; defaults */ +/* > are used for higher-numbered parameters. */ +/* > */ +/* > PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* > refinement or not. */ +/* > Default: 1.0D+0 */ +/* > = 0.0: No refinement is performed, and no error bounds are */ +/* > computed. */ +/* > = 1.0: Use the double-precision refinement algorithm, */ +/* > possibly with doubled-single computations if the */ +/* > compilation environment does not support DOUBLE */ +/* > PRECISION. */ +/* > (other values are reserved for future use) */ +/* > */ +/* > PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* > computations allowed for refinement. */ +/* > Default: 10 */ +/* > Aggressive: Set to 100 to permit convergence using approximate */ +/* > factorizations or factorizations other than LU. If */ +/* > the factorization uses a technique other than */ +/* > Gaussian elimination, the guarantees in */ +/* > err_bnds_norm and err_bnds_comp may no longer be */ +/* > trustworthy. */ +/* > */ +/* > PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* > will attempt to find a solution with small componentwise */ +/* > relative error in the double-precision algorithm. Positive */ +/* > is true, 0.0 is false. */ +/* > Default: 1.0 (attempt componentwise convergence) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. The solution to every right-hand side is */ +/* > guaranteed. */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly singular, so */ +/* > the solution and error bounds could not be computed. RCOND = 0 */ +/* > is returned. */ +/* > = N+J: The solution corresponding to the Jth right-hand side is */ +/* > not guaranteed. The solutions corresponding to other right- */ +/* > hand sides K with K > J may not be guaranteed as well, but */ +/* > only the first such right-hand side is reported. If a small */ +/* > componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* > the Jth right-hand side is the first with a normwise error */ +/* > bound that is not guaranteed (the smallest J such */ +/* > that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* > the Jth right-hand side is the first with either a normwise or */ +/* > componentwise error bound that is not guaranteed (the smallest */ +/* > J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* > ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* > ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* > about all of the right-hand sides check ERR_BNDS_NORM or */ +/* > ERR_BNDS_COMP. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, + integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, + doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, + doublereal *err_bnds_comp__, integer *nparams, doublereal *params, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__; + char norm[1]; + integer ref_type__; + extern integer ilatrans_(char *); + logical ignore_cwise__; + integer j; + extern logical lsame_(char *, char *); + doublereal anorm, rcond_tmp__; + integer prec_type__; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dgecon_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *), xerbla_(char *, integer *, ftnlen); + logical colequ, notran, rowequ; + integer trans_type__; + extern integer ilaprec_(char *); + extern doublereal dla_gercond_(char *, integer *, doublereal *, integer * + , doublereal *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + integer ithresh, n_norms__; + doublereal rthresh, cwise_wrong__; + extern /* Subroutine */ int dla_gerfsx_extended_(integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, logical *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, logical *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ================================================================== */ + + +/* Check the input parameters. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + trans_type__ = ilatrans_(trans); + ref_type__ = 1; + if (*nparams >= 1) { + if (params[1] < 0.) { + params[1] = 1.; + } else { + ref_type__ = (integer) params[1]; + } + } + +/* Set default parameters. */ + + illrcond_thresh__ = (doublereal) (*n) * dlamch_("Epsilon"); + ithresh = 10; + rthresh = .5; + unstable_thresh__ = .25; + ignore_cwise__ = FALSE_; + + if (*nparams >= 2) { + if (params[2] < 0.) { + params[2] = (doublereal) ithresh; + } else { + ithresh = (integer) params[2]; + } + } + if (*nparams >= 3) { + if (params[3] < 0.) { + if (ignore_cwise__) { + params[3] = 0.; + } else { + params[3] = 1.; + } + } else { + ignore_cwise__ = params[3] == 0.; + } + } + if (ref_type__ == 0 || *n_err_bnds__ == 0) { + n_norms__ = 0; + } else if (ignore_cwise__) { + n_norms__ = 1; + } else { + n_norms__ = 2; + } + + notran = lsame_(trans, "N"); + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + +/* Test input parameters. */ + + if (trans_type__ == -1) { + *info = -1; + } else if (! rowequ && ! colequ && ! lsame_(equed, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (*ldb < f2cmax(1,*n)) { + *info = -13; + } else if (*ldx < f2cmax(1,*n)) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGERFSX", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *nrhs == 0) { + *rcond = 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 0.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } + if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.; + } + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.; + } + } + return 0; + } + +/* Default to failure. */ + + *rcond = 0.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 1.; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } + if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.; + } + } + +/* Compute the norm of A and the reciprocal of the condition */ +/* number of A. */ + + if (notran) { + *(unsigned char *)norm = 'I'; + } else { + *(unsigned char *)norm = '1'; + } + anorm = dlange_(norm, n, n, &a[a_offset], lda, &work[1]); + dgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], + info); + +/* Perform refinement on each right-hand side */ + + if (ref_type__ != 0) { + prec_type__ = ilaprec_("E"); + if (notran) { + dla_gerfsx_extended_(&prec_type__, &trans_type__, n, nrhs, &a[ + a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &colequ, & + c__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], & + n_norms__, &err_bnds_norm__[err_bnds_norm_offset], & + err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], & + work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, + &rthresh, &unstable_thresh__, &ignore_cwise__, info); + } else { + dla_gerfsx_extended_(&prec_type__, &trans_type__, n, nrhs, &a[ + a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &rowequ, & + r__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], & + n_norms__, &err_bnds_norm__[err_bnds_norm_offset], & + err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], & + work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, + &rthresh, &unstable_thresh__, &ignore_cwise__, info); + } + } +/* Computing MAX */ + d__1 = 10., d__2 = sqrt((doublereal) (*n)); + err_lbnd__ = f2cmax(d__1,d__2) * dlamch_("Epsilon"); + if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { + +/* Compute scaled normwise condition number cond(A*C). */ + + if (colequ && notran) { + rcond_tmp__ = dla_gercond_(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c_n1, &c__[1], info, &work[1] + , &iwork[1]); + } else if (rowequ && ! notran) { + rcond_tmp__ = dla_gercond_(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c_n1, &r__[1], info, &work[1] + , &iwork[1]); + } else { + rcond_tmp__ = dla_gercond_(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c__0, &r__[1], info, &work[1] + , &iwork[1]); + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 + << 1)] > 1.) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.; + err_bnds_norm__[j + err_bnds_norm_dim1] = 0.; + if (*info <= *n) { + *info = *n + j; + } + } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < + err_lbnd__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; + } + } + } + if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { + +/* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ +/* each right-hand side using the current solution as an estimate of */ +/* the true solution. If the componentwise error estimate is too */ +/* large, then the solution is a lousy estimate of truth and the */ +/* estimated RCOND may be too optimistic. To avoid misleading users, */ +/* the inverse condition number is set to 0.0 when the estimated */ +/* cwise error is at least CWISE_WRONG. */ + + cwise_wrong__ = sqrt(dlamch_("Epsilon")); + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + cwise_wrong__) { + rcond_tmp__ = dla_gercond_(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c__1, &x[j * x_dim1 + 1], + info, &work[1], &iwork[1]); + } else { + rcond_tmp__ = 0.; + } + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 + << 1)] > 1.) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.; + err_bnds_comp__[j + err_bnds_comp_dim1] = 0.; + if (params[3] == 1. && *info < *n + j) { + *info = *n + j; + } + } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + err_lbnd__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; + } + } + } + + return 0; + +/* End of DGERFSX */ + +} /* dgerfsx_ */ + diff --git a/lapack-netlib/SRC/dgerq2.c b/lapack-netlib/SRC/dgerq2.c new file mode 100644 index 000000000..0044467b9 --- /dev/null +++ b/lapack-netlib/SRC/dgerq2.c @@ -0,0 +1,587 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGERQ2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGERQ2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGERQ2 computes an RQ factorization of a real m by n matrix A: */ +/* > A = R * Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, if m <= n, the upper triangle of the subarray */ +/* > A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; */ +/* > if m >= n, the elements on and above the (m-n)-th subdiagonal */ +/* > contain the m by n upper trapezoidal matrix R; the remaining */ +/* > elements, with the array TAU, represent the orthogonal matrix */ +/* > Q as a product of elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ +/* > A(m-k+i,1:n-k+i-1), and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgerq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen); + doublereal aii; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGERQ2", &i__1, (ftnlen)6); + return 0; + } + + k = f2cmin(*m,*n); + + for (i__ = k; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(m-k+i,1:n-k+i-1) */ + + i__1 = *n - k + i__; + dlarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[*m - k + + i__ + a_dim1], lda, &tau[i__]); + +/* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right */ + + aii = a[*m - k + i__ + (*n - k + i__) * a_dim1]; + a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.; + i__1 = *m - k + i__ - 1; + i__2 = *n - k + i__; + dlarf_("Right", &i__1, &i__2, &a[*m - k + i__ + a_dim1], lda, &tau[ + i__], &a[a_offset], lda, &work[1]); + a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DGERQ2 */ + +} /* dgerq2_ */ + diff --git a/lapack-netlib/SRC/dgerqf.c b/lapack-netlib/SRC/dgerqf.c new file mode 100644 index 000000000..d0ec2af49 --- /dev/null +++ b/lapack-netlib/SRC/dgerqf.c @@ -0,0 +1,710 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGERQF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGERQF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGERQF computes an RQ factorization of a real M-by-N matrix A: */ +/* > A = R * Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if m <= n, the upper triangle of the subarray */ +/* > A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; */ +/* > if m >= n, the elements on and above the (m-n)-th subdiagonal */ +/* > contain the M-by-N upper trapezoidal matrix R; */ +/* > the remaining elements, with the array TAU, represent the */ +/* > orthogonal matrix Q as a product of f2cmin(m,n) elementary */ +/* > reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,M). */ +/* > For optimum performance LWORK >= M*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ +/* > A(m-k+i,1:n-k+i-1), and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgerqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo; + extern /* Subroutine */ int dgerq2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer ib, nb, ki, kk; + extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer mu, nu, nx; + extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + + if (*info == 0) { + k = f2cmin(*m,*n); + if (k == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *m * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < f2cmax(1,*m) && ! lquery) { + *info = -7; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGERQF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + return 0; + } + + nbmin = 2; + nx = 1; + iws = *m; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "DGERQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGERQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially. */ +/* The last kk rows are handled by the block method. */ + + ki = (k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = k, i__2 = ki + nb; + kk = f2cmin(i__1,i__2); + + i__1 = k - kk + 1; + i__2 = -nb; + for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ + += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the RQ factorization of the current block */ +/* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) */ + + i__3 = *n - k + i__ + ib - 1; + dgerq2_(&ib, &i__3, &a[*m - k + i__ + a_dim1], lda, &tau[i__], & + work[1], &iinfo); + if (*m - k + i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *n - k + i__ + ib - 1; + dlarft_("Backward", "Rowwise", &i__3, &ib, &a[*m - k + i__ + + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */ + + i__3 = *m - k + i__ - 1; + i__4 = *n - k + i__ + ib - 1; + dlarfb_("Right", "No transpose", "Backward", "Rowwise", &i__3, + &i__4, &ib, &a[*m - k + i__ + a_dim1], lda, &work[1], + &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork); + } +/* L10: */ + } + mu = *m - k + i__ + nb - 1; + nu = *n - k + i__ + nb - 1; + } else { + mu = *m; + nu = *n; + } + +/* Use unblocked code to factor the last or only block */ + + if (mu > 0 && nu > 0) { + dgerq2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGERQF */ + +} /* dgerqf_ */ + diff --git a/lapack-netlib/SRC/dgesc2.c b/lapack-netlib/SRC/dgesc2.c new file mode 100644 index 000000000..a2a1aac42 --- /dev/null +++ b/lapack-netlib/SRC/dgesc2.c @@ -0,0 +1,604 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGESC2 solves a system of linear equations using the LU factorization with complete pivoting co +mputed by sgetc2. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESC2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) */ + +/* INTEGER LDA, N */ +/* DOUBLE PRECISION SCALE */ +/* INTEGER IPIV( * ), JPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ), RHS( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESC2 solves a system of linear equations */ +/* > */ +/* > A * X = scale* RHS */ +/* > */ +/* > with a general N-by-N matrix A using the LU factorization with */ +/* > complete pivoting computed by DGETC2. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the LU part of the factorization of the n-by-n */ +/* > matrix A computed by DGETC2: A = P * L * U * Q */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1, N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] RHS */ +/* > \verbatim */ +/* > RHS is DOUBLE PRECISION array, dimension (N). */ +/* > On entry, the right hand side vector b. */ +/* > On exit, the solution vector X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N). */ +/* > The pivot indices; for 1 <= i <= N, row i of the */ +/* > matrix has been interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JPIV */ +/* > \verbatim */ +/* > JPIV is INTEGER array, dimension (N). */ +/* > The pivot indices; for 1 <= j <= N, column j of the */ +/* > matrix has been interchanged with column JPIV(j). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is DOUBLE PRECISION */ +/* > On exit, SCALE contains the scale factor. SCALE is chosen */ +/* > 0 <= SCALE <= 1 to prevent overflow in the solution. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup doubleGEauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* > Umea University, S-901 87 Umea, Sweden. */ + +/* ===================================================================== */ +/* Subroutine */ int dgesc2_(integer *n, doublereal *a, integer *lda, + doublereal *rhs, integer *ipiv, integer *jpiv, doublereal *scale) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + doublereal temp; + integer i__, j; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *); + extern integer idamax_(integer *, doublereal *, integer *); + doublereal bignum; + extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + doublereal smlnum, eps; + + +/* -- LAPACK auxiliary routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Set constant to control overflow */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --rhs; + --ipiv; + --jpiv; + + /* Function Body */ + eps = dlamch_("P"); + smlnum = dlamch_("S") / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Apply permutations IPIV to RHS */ + + i__1 = *n - 1; + dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1); + +/* Solve for L part */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + rhs[j] -= a[j + i__ * a_dim1] * rhs[i__]; +/* L10: */ + } +/* L20: */ + } + +/* Solve for U part */ + + *scale = 1.; + +/* Check for scaling */ + + i__ = idamax_(n, &rhs[1], &c__1); + if (smlnum * 2. * (d__1 = rhs[i__], abs(d__1)) > (d__2 = a[*n + *n * + a_dim1], abs(d__2))) { + temp = .5 / (d__1 = rhs[i__], abs(d__1)); + dscal_(n, &temp, &rhs[1], &c__1); + *scale *= temp; + } + + for (i__ = *n; i__ >= 1; --i__) { + temp = 1. / a[i__ + i__ * a_dim1]; + rhs[i__] *= temp; + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + rhs[i__] -= rhs[j] * (a[i__ + j * a_dim1] * temp); +/* L30: */ + } +/* L40: */ + } + +/* Apply permutations JPIV to the solution (RHS) */ + + i__1 = *n - 1; + dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1); + return 0; + +/* End of DGESC2 */ + +} /* dgesc2_ */ + diff --git a/lapack-netlib/SRC/dgesdd.c b/lapack-netlib/SRC/dgesdd.c new file mode 100644 index 000000000..f831c97b0 --- /dev/null +++ b/lapack-netlib/SRC/dgesdd.c @@ -0,0 +1,2167 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGESDD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESDD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, */ +/* WORK, LWORK, IWORK, INFO ) */ + +/* CHARACTER JOBZ */ +/* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESDD computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, optionally computing the left and right singular */ +/* > vectors. If singular vectors are desired, it uses a */ +/* > divide-and-conquer algorithm. */ +/* > */ +/* > The SVD is written */ +/* > */ +/* > A = U * SIGMA * transpose(V) */ +/* > */ +/* > where SIGMA is an M-by-N matrix which is zero except for its */ +/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ +/* > V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ +/* > are the singular values of A; they are real and non-negative, and */ +/* > are returned in descending order. The first f2cmin(m,n) columns of */ +/* > U and V are the left and right singular vectors of A. */ +/* > */ +/* > Note that the routine returns VT = V**T, not V. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix U: */ +/* > = 'A': all M columns of U and all N rows of V**T are */ +/* > returned in the arrays U and VT; */ +/* > = 'S': the first f2cmin(M,N) columns of U and the first */ +/* > f2cmin(M,N) rows of V**T are returned in the arrays U */ +/* > and VT; */ +/* > = 'O': If M >= N, the first N columns of U are overwritten */ +/* > on the array A and all rows of V**T are returned in */ +/* > the array VT; */ +/* > otherwise, all columns of U are returned in the */ +/* > array U and the first M rows of V**T are overwritten */ +/* > in the array A; */ +/* > = 'N': no columns of U or rows of V**T are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if JOBZ = 'O', A is overwritten with the first N columns */ +/* > of U (the left singular vectors, stored */ +/* > columnwise) if M >= N; */ +/* > A is overwritten with the first M rows */ +/* > of V**T (the right singular vectors, stored */ +/* > rowwise) otherwise. */ +/* > if JOBZ .ne. 'O', the contents of A are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The singular values of A, sorted so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension (LDU,UCOL) */ +/* > UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; */ +/* > UCOL = f2cmin(M,N) if JOBZ = 'S'. */ +/* > If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M */ +/* > orthogonal matrix U; */ +/* > if JOBZ = 'S', U contains the first f2cmin(M,N) columns of U */ +/* > (the left singular vectors, stored columnwise); */ +/* > if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1; if */ +/* > JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is DOUBLE PRECISION array, dimension (LDVT,N) */ +/* > If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the */ +/* > N-by-N orthogonal matrix V**T; */ +/* > if JOBZ = 'S', VT contains the first f2cmin(M,N) rows of */ +/* > V**T (the right singular vectors, stored rowwise); */ +/* > if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1; */ +/* > if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; */ +/* > if JOBZ = 'S', LDVT >= f2cmin(M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 1. */ +/* > If LWORK = -1, a workspace query is assumed. The optimal */ +/* > size for the WORK array is calculated and stored in WORK(1), */ +/* > and no other work except argument checking is performed. */ +/* > */ +/* > Let mx = f2cmax(M,N) and mn = f2cmin(M,N). */ +/* > If JOBZ = 'N', LWORK >= 3*mn + f2cmax( mx, 7*mn ). */ +/* > If JOBZ = 'O', LWORK >= 3*mn + f2cmax( mx, 5*mn*mn + 4*mn ). */ +/* > If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn. */ +/* > If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx. */ +/* > These are not tight minimums in all cases; see comments inside code. */ +/* > For good performance, LWORK should generally be larger; */ +/* > a query is recommended. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (8*f2cmin(M,N)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: DBDSDC did not converge, updating process failed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup doubleGEsing */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *s, doublereal *u, integer *ldu, + doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Local variables */ + integer lwork_dorglq_mn__, lwork_dorglq_nn__, lwork_dorgqr_mm__, + lwork_dorgqr_mn__, iscl; + doublereal anrm; + integer idum[1], ierr, itau, lwork_dormbr_qln_mm__, lwork_dormbr_qln_mn__, + lwork_dormbr_qln_nn__, lwork_dormbr_prt_mm__, + lwork_dormbr_prt_mn__, lwork_dormbr_prt_nn__, i__; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + integer chunk, minmn, wrkbl, itaup, itauq, mnthr; + logical wntqa; + integer nwork; + logical wntqn, wntqo, wntqs; + integer ie, lwork_dorgbr_p_mm__; + extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer il, lwork_dorgbr_q_nn__; + extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + extern doublereal dlamch_(char *); + integer ir, bdspac; + extern doublereal dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + integer iu; + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen), dorgbr_(char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + extern logical disnan_(doublereal *); + doublereal bignum; + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; + doublereal smlnum; + logical wntqas, lquery; + integer blk; + doublereal dum[1], eps; + integer ivt, lwork_dgebrd_mm__, lwork_dgebrd_mn__, lwork_dgebrd_nn__, + lwork_dgelqf_mn__, lwork_dgeqrf_mn__; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + lquery = *lwork == -1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* Compute workspace */ +/* Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace allocated at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + bdspac = 0; + mnthr = (integer) (minmn * 11. / 6.); + if (*m >= *n && minmn > 0) { + +/* Compute space needed for DBDSDC */ + + if (wntqn) { +/* dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) */ +/* keep 7*N for backwards compatibility. */ + bdspac = *n * 7; + } else { + bdspac = *n * 3 * *n + (*n << 2); + } + +/* Compute space preferred for each routine */ + dgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mn__ = (integer) dum[0]; + + dgebrd_(n, n, dum, n, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_nn__ = (integer) dum[0]; + + dgeqrf_(m, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dgeqrf_mn__ = (integer) dum[0]; + + dorgbr_("Q", n, n, n, dum, n, dum, dum, &c_n1, &ierr); + lwork_dorgbr_q_nn__ = (integer) dum[0]; + + dorgqr_(m, m, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dorgqr_mm__ = (integer) dum[0]; + + dorgqr_(m, n, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dorgqr_mn__ = (integer) dum[0]; + + dormbr_("P", "R", "T", n, n, n, dum, n, dum, dum, n, dum, &c_n1, & + ierr); + lwork_dormbr_prt_nn__ = (integer) dum[0]; + + dormbr_("Q", "L", "N", n, n, n, dum, n, dum, dum, n, dum, &c_n1, & + ierr); + lwork_dormbr_qln_nn__ = (integer) dum[0]; + + dormbr_("Q", "L", "N", m, n, n, dum, m, dum, dum, m, dum, &c_n1, & + ierr); + lwork_dormbr_qln_mn__ = (integer) dum[0]; + + dormbr_("Q", "L", "N", m, m, n, dum, m, dum, dum, m, dum, &c_n1, & + ierr); + lwork_dormbr_qln_mm__ = (integer) dum[0]; + + if (*m >= mnthr) { + if (wntqn) { + +/* Path 1 (M >> N, JOBZ='N') */ + + wrkbl = *n + lwork_dgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n; + maxwrk = f2cmax(i__1,i__2); + minwrk = bdspac + *n; + } else if (wntqo) { + +/* Path 2 (M >> N, JOBZ='O') */ + + wrkbl = *n + lwork_dgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + (*n << 1) * *n; + minwrk = bdspac + (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M >> N, JOBZ='S') */ + + wrkbl = *n + lwork_dgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M >> N, JOBZ='A') */ + + wrkbl = *n + lwork_dgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *n * *n; +/* Computing MAX */ + i__1 = *n * 3 + bdspac, i__2 = *n + *m; + minwrk = *n * *n + f2cmax(i__1,i__2); + } + } else { + +/* Path 5 (M >= N, but not much larger) */ + + wrkbl = *n * 3 + lwork_dgebrd_mn__; + if (wntqn) { +/* Path 5n (M >= N, jobz='N') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *n * 3 + f2cmax(*m,bdspac); + } else if (wntqo) { +/* Path 5o (M >= N, jobz='O') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *m, i__2 = *n * *n + bdspac; + minwrk = *n * 3 + f2cmax(i__1,i__2); + } else if (wntqs) { +/* Path 5s (M >= N, jobz='S') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *n * 3 + f2cmax(*m,bdspac); + } else if (wntqa) { +/* Path 5a (M >= N, jobz='A') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *n * 3 + f2cmax(*m,bdspac); + } + } + } else if (minmn > 0) { + +/* Compute space needed for DBDSDC */ + + if (wntqn) { +/* dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) */ +/* keep 7*N for backwards compatibility. */ + bdspac = *m * 7; + } else { + bdspac = *m * 3 * *m + (*m << 2); + } + +/* Compute space preferred for each routine */ + dgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mn__ = (integer) dum[0]; + + dgebrd_(m, m, &a[a_offset], m, &s[1], dum, dum, dum, dum, &c_n1, & + ierr); + lwork_dgebrd_mm__ = (integer) dum[0]; + + dgelqf_(m, n, &a[a_offset], m, dum, dum, &c_n1, &ierr); + lwork_dgelqf_mn__ = (integer) dum[0]; + + dorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr); + lwork_dorglq_nn__ = (integer) dum[0]; + + dorglq_(m, n, m, &a[a_offset], m, dum, dum, &c_n1, &ierr); + lwork_dorglq_mn__ = (integer) dum[0]; + + dorgbr_("P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr); + lwork_dorgbr_p_mm__ = (integer) dum[0]; + + dormbr_("P", "R", "T", m, m, m, dum, m, dum, dum, m, dum, &c_n1, & + ierr); + lwork_dormbr_prt_mm__ = (integer) dum[0]; + + dormbr_("P", "R", "T", m, n, m, dum, m, dum, dum, m, dum, &c_n1, & + ierr); + lwork_dormbr_prt_mn__ = (integer) dum[0]; + + dormbr_("P", "R", "T", n, n, m, dum, n, dum, dum, n, dum, &c_n1, & + ierr); + lwork_dormbr_prt_nn__ = (integer) dum[0]; + + dormbr_("Q", "L", "N", m, m, m, dum, m, dum, dum, m, dum, &c_n1, & + ierr); + lwork_dormbr_qln_mm__ = (integer) dum[0]; + + if (*n >= mnthr) { + if (wntqn) { + +/* Path 1t (N >> M, JOBZ='N') */ + + wrkbl = *m + lwork_dgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m; + maxwrk = f2cmax(i__1,i__2); + minwrk = bdspac + *m; + } else if (wntqo) { + +/* Path 2t (N >> M, JOBZ='O') */ + + wrkbl = *m + lwork_dgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + lwork_dorglq_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + (*m << 1) * *m; + minwrk = bdspac + (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N >> M, JOBZ='S') */ + + wrkbl = *m + lwork_dgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + lwork_dorglq_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N >> M, JOBZ='A') */ + + wrkbl = *m + lwork_dgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + lwork_dorglq_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *m * *m; +/* Computing MAX */ + i__1 = *m * 3 + bdspac, i__2 = *m + *n; + minwrk = *m * *m + f2cmax(i__1,i__2); + } + } else { + +/* Path 5t (N > M, but not much larger) */ + + wrkbl = *m * 3 + lwork_dgebrd_mn__; + if (wntqn) { +/* Path 5tn (N > M, jobz='N') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *m * 3 + f2cmax(*n,bdspac); + } else if (wntqo) { +/* Path 5to (N > M, jobz='O') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *n, i__2 = *m * *m + bdspac; + minwrk = *m * 3 + f2cmax(i__1,i__2); + } else if (wntqs) { +/* Path 5ts (N > M, jobz='S') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *m * 3 + f2cmax(*n,bdspac); + } else if (wntqa) { +/* Path 5ta (N > M, jobz='A') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *m * 3 + f2cmax(*n,bdspac); + } + } + } + maxwrk = f2cmax(maxwrk,minwrk); + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGESDD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = sqrt(dlamch_("S")) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, dum); + if (disnan_(&anrm)) { + *info = -4; + return 0; + } + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce using the QR */ +/* decomposition (if sufficient workspace available) */ + + if (*m >= mnthr) { + + if (wntqn) { + +/* Path 1 (M >> N, JOBZ='N') */ +/* No singular vectors to be computed */ + + itau = 1; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* Workspace: need N [tau] + N [work] */ +/* Workspace: prefer N [tau] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__1, &i__2, &c_b63, &c_b63, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* Workspace: need 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work] */ + + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *n; + +/* Perform bidiagonal SVD, computing singular values only */ +/* Workspace: need N [e] + BDSPAC */ + + dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* Path 2 (M >> N, JOBZ = 'O') */ +/* N left singular vectors to be overwritten on A and */ +/* N right singular vectors to be computed in VT */ + + ir = 1; + +/* WORK(IR) is LDWRKR by N */ + + if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { + ldwrkr = *lda; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* Workspace: need N*N [R] + N [tau] + N [work] */ +/* Workspace: prefer N*N [R] + N [tau] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__1, &i__2, &c_b63, &c_b63, &work[ir + 1], & + ldwrkr); + +/* Generate Q in A */ +/* Workspace: need N*N [R] + N [tau] + N [work] */ +/* Workspace: prefer N*N [R] + N [tau] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] */ + + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* WORK(IU) is N by N */ + + iu = nwork; + nwork = iu + *n * *n; + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in WORK(IU) and computing right */ +/* singular vectors of bidiagonal matrix in VT */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC */ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite WORK(IU) by left singular vectors of R */ +/* and VT by right singular vectors of R */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work] */ +/* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], n, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in WORK(IR) and copying to A */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] */ +/* Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U] */ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = f2cmin(i__3,ldwrkr); + dgemm_("N", "N", &chunk, n, n, &c_b84, &a[i__ + a_dim1], + lda, &work[iu], n, &c_b63, &work[ir], &ldwrkr); + dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* Path 3 (M >> N, JOBZ='S') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* Workspace: need N*N [R] + N [tau] + N [work] */ +/* Workspace: prefer N*N [R] + N [tau] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_("L", &i__2, &i__1, &c_b63, &c_b63, &work[ir + 1], & + ldwrkr); + +/* Generate Q in A */ +/* Workspace: need N*N [R] + N [tau] + N [work] */ +/* Workspace: prefer N*N [R] + N [tau] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagoal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC */ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of R and VT */ +/* by right singular vectors of R */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in U */ +/* Workspace: need N*N [R] */ + + dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + dgemm_("N", "N", m, n, n, &c_b84, &a[a_offset], lda, &work[ir] + , &ldwrkr, &c_b63, &u[u_offset], ldu); + + } else if (wntqa) { + +/* Path 4 (M >> N, JOBZ='A') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* Workspace: need N*N [U] + N [tau] + N [work] */ +/* Workspace: prefer N*N [U] + N [tau] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* Generate Q in U */ +/* Workspace: need N*N [U] + N [tau] + M [work] */ +/* Workspace: prefer N*N [U] + N [tau] + M*NB [work] */ + i__2 = *lwork - nwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out other entries */ + + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_("L", &i__2, &i__1, &c_b63, &c_b63, &a[a_dim1 + 2], + lda); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in WORK(IU) and computing right */ +/* singular vectors of bidiagonal matrix in VT */ +/* Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC */ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite WORK(IU) by left singular vectors of R and VT */ +/* by right singular vectors of R */ +/* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* Workspace: need N*N [U] */ + + dgemm_("N", "N", m, n, n, &c_b84, &u[u_offset], ldu, &work[iu] + , &ldwrku, &c_b63, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else { + +/* M .LT. MNTHR */ + +/* Path 5 (M >= N, but not much larger) */ +/* Reduce to bidiagonal form without QR decomposition */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize A */ +/* Workspace: need 3*N [e, tauq, taup] + M [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work] */ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* Path 5n (M >= N, JOBZ='N') */ +/* Perform bidiagonal SVD, only computing singular values */ +/* Workspace: need 3*N [e, tauq, taup] + BDSPAC */ + + dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { +/* Path 5o (M >= N, JOBZ='O') */ + iu = nwork; + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + nwork = iu + ldwrku * *n; + dlaset_("F", m, n, &c_b63, &c_b63, &work[iu], &ldwrku); +/* IR is unused; silence compile warnings */ + ir = -1; + } else { + +/* WORK( IU ) is N by N */ + + ldwrku = *n; + nwork = iu + ldwrku * *n; + +/* WORK(IR) is LDWRKR by N */ + + ir = nwork; + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in WORK(IU) and computing right */ +/* singular vectors of bidiagonal matrix in VT */ +/* Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC */ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, & + vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[ + 1], info); + +/* Overwrite VT by right singular vectors of A */ +/* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* Path 5o-fast */ +/* Overwrite WORK(IU) by left singular vectors of A */ +/* Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* Copy left singular vectors of A from WORK(IU) to A */ + + dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* Path 5o-slow */ +/* Generate Q in A */ +/* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__2, &ierr); + +/* Multiply Q in A by left singular vectors of */ +/* bidiagonal matrix in WORK(IU), storing result in */ +/* WORK(IR) and copying to A */ +/* Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R] */ +/* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R] */ + + i__2 = *m; + i__1 = ldwrkr; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = f2cmin(i__3,ldwrkr); + dgemm_("N", "N", &chunk, n, n, &c_b84, &a[i__ + + a_dim1], lda, &work[iu], &ldwrku, &c_b63, & + work[ir], &ldwrkr); + dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L20: */ + } + } + + } else if (wntqs) { + +/* Path 5s (M >= N, JOBZ='S') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need 3*N [e, tauq, taup] + BDSPAC */ + + dlaset_("F", m, n, &c_b63, &c_b63, &u[u_offset], ldu); + dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* Workspace: need 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* Path 5a (M >= N, JOBZ='A') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need 3*N [e, tauq, taup] + BDSPAC */ + + dlaset_("F", m, m, &c_b63, &c_b63, &u[u_offset], ldu); + dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + if (*m > *n) { + i__1 = *m - *n; + i__2 = *m - *n; + dlaset_("F", &i__1, &i__2, &c_b63, &c_b84, &u[*n + 1 + (* + n + 1) * u_dim1], ldu); + } + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* Workspace: need 3*N [e, tauq, taup] + M [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce using the LQ decomposition (if */ +/* sufficient workspace available) */ + + if (*n >= mnthr) { + + if (wntqn) { + +/* Path 1t (N >> M, JOBZ='N') */ +/* No singular vectors to be computed */ + + itau = 1; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* Workspace: need M [tau] + M [work] */ +/* Workspace: prefer M [tau] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out above L */ + + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_("U", &i__1, &i__2, &c_b63, &c_b63, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* Workspace: need 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work] */ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *m; + +/* Perform bidiagonal SVD, computing singular values only */ +/* Workspace: need M [e] + BDSPAC */ + + dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* Path 2t (N >> M, JOBZ='O') */ +/* M right singular vectors to be overwritten on A and */ +/* M left singular vectors to be computed in U */ + + ivt = 1; + +/* WORK(IVT) is M by M */ +/* WORK(IL) is M by M; it is later resized to M by chunk for gemm */ + + il = ivt + *m * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { + ldwrkl = *m; + chunk = *n; + } else { + ldwrkl = *m; + chunk = (*lwork - *m * *m) / *m; + } + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work] */ +/* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_("U", &i__1, &i__2, &c_b63, &c_b63, &work[il + ldwrkl], + &ldwrkl); + +/* Generate Q in A */ +/* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work] */ +/* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL) */ +/* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] */ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U, and computing right singular */ +/* vectors of bidiagonal matrix in WORK(IVT) */ +/* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC */ + + dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], m, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of L and WORK(IVT) */ +/* by right singular vectors of L */ +/* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], m, &work[nwork], &i__1, &ierr); + +/* Multiply right singular vectors of L in WORK(IVT) by Q */ +/* in A, storing result in WORK(IL) and copying to A */ +/* Workspace: need M*M [VT] + M*M [L] */ +/* Workspace: prefer M*M [VT] + M*N [L] */ +/* At this point, L is resized as M by chunk. */ + + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = f2cmin(i__3,chunk); + dgemm_("N", "N", m, &blk, m, &c_b84, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b63, &work[il], & + ldwrkl); + dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L30: */ + } + + } else if (wntqs) { + +/* Path 3t (N >> M, JOBZ='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* Workspace: need M*M [L] + M [tau] + M [work] */ +/* Workspace: prefer M*M [L] + M [tau] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_("U", &i__2, &i__1, &c_b63, &c_b63, &work[il + ldwrkl], + &ldwrkl); + +/* Generate Q in A */ +/* Workspace: need M*M [L] + M [tau] + M [work] */ +/* Workspace: prefer M*M [L] + M [tau] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU). */ +/* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] */ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC */ + + dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of L and VT */ +/* by right singular vectors of L */ +/* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply right singular vectors of L in WORK(IL) by */ +/* Q in A, storing result in VT */ +/* Workspace: need M*M [L] */ + + dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + dgemm_("N", "N", m, n, m, &c_b84, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b63, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* Path 4t (N >> M, JOBZ='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* Workspace: need M*M [VT] + M [tau] + M [work] */ +/* Workspace: prefer M*M [VT] + M [tau] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* Generate Q in VT */ +/* Workspace: need M*M [VT] + M [tau] + N [work] */ +/* Workspace: prefer M*M [VT] + M [tau] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__2, &ierr); + +/* Produce L in A, zeroing out other entries */ + + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_("U", &i__2, &i__1, &c_b63, &c_b63, &a[(a_dim1 << 1) + + 1], lda); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work] */ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in WORK(IVT) */ +/* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC */ + + dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* Overwrite U by left singular vectors of L and WORK(IVT) */ +/* by right singular vectors of L */ +/* Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work] */ +/* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work] */ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* Multiply right singular vectors of L in WORK(IVT) by */ +/* Q in VT, storing result in A */ +/* Workspace: need M*M [VT] */ + + dgemm_("N", "N", m, n, m, &c_b84, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b63, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else { + +/* N .LT. MNTHR */ + +/* Path 5t (N > M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize A */ +/* Workspace: need 3*M [e, tauq, taup] + N [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work] */ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* Path 5tn (N > M, JOBZ='N') */ +/* Perform bidiagonal SVD, only computing singular values */ +/* Workspace: need 3*M [e, tauq, taup] + BDSPAC */ + + dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { +/* Path 5to (N > M, JOBZ='O') */ + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* WORK( IVT ) is M by N */ + + dlaset_("F", m, n, &c_b63, &c_b63, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; +/* IL is unused; silence compile warnings */ + il = -1; + } else { + +/* WORK( IVT ) is M by M */ + + nwork = ivt + ldwkvt * *m; + il = nwork; + +/* WORK(IL) is M by CHUNK */ + + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in WORK(IVT) */ +/* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC */ + + dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* Overwrite U by left singular vectors of A */ +/* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* Path 5to-fast */ +/* Overwrite WORK(IVT) by left singular vectors of A */ +/* Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + +/* Copy right singular vectors of A from WORK(IVT) to A */ + + dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* Path 5to-slow */ +/* Generate P**T in A */ +/* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* Multiply Q in A by right singular vectors of */ +/* bidiagonal matrix in WORK(IVT), storing result in */ +/* WORK(IL) and copying to A */ +/* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L] */ +/* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L] */ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = f2cmin(i__3,chunk); + dgemm_("N", "N", m, &blk, m, &c_b84, &work[ivt], & + ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b63, & + work[il], m); + dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + } + } else if (wntqs) { + +/* Path 5ts (N > M, JOBZ='S') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need 3*M [e, tauq, taup] + BDSPAC */ + + dlaset_("F", m, n, &c_b63, &c_b63, &vt[vt_offset], ldvt); + dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* Workspace: need 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* Path 5ta (N > M, JOBZ='A') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need 3*M [e, tauq, taup] + BDSPAC */ + + dlaset_("F", n, n, &c_b63, &c_b63, &vt[vt_offset], ldvt); + dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of VT to identity matrix */ + + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + dlaset_("F", &i__1, &i__2, &c_b63, &c_b84, &vt[*m + 1 + (* + m + 1) * vt_dim1], ldvt); + } + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* Workspace: need 3*M [e, tauq, taup] + N [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (anrm < smlnum) { + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGESDD */ + +} /* dgesdd_ */ + diff --git a/lapack-netlib/SRC/dgesv.c b/lapack-netlib/SRC/dgesv.c new file mode 100644 index 000000000..30fdb4e15 --- /dev/null +++ b/lapack-netlib/SRC/dgesv.c @@ -0,0 +1,574 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGESV computes the solution to system of linear equations A * X = B for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ + +/* INTEGER INFO, LDA, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESV computes the solution to a real system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > The LU decomposition with partial pivoting and row interchanges is */ +/* > used to factor A as */ +/* > A = P * L * U, */ +/* > where P is a permutation matrix, L is unit lower triangular, and U is */ +/* > upper triangular. The factored form of A is then used to solve the */ +/* > system of equations A * X = B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the N-by-N coefficient matrix A. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices that define the permutation matrix P; */ +/* > row i of the matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS matrix of right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, so the solution could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer + *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *, ftnlen), dgetrs_(char *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGESV ", &i__1, (ftnlen)6); + return 0; + } + +/* Compute the LU factorization of A. */ + + dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of DGESV */ + +} /* dgesv_ */ + diff --git a/lapack-netlib/SRC/dgesvd.c b/lapack-netlib/SRC/dgesvd.c new file mode 100644 index 000000000..597ad6dda --- /dev/null +++ b/lapack-netlib/SRC/dgesvd.c @@ -0,0 +1,4475 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGESVD computes the singular value decomposition (SVD) for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, */ +/* WORK, LWORK, INFO ) */ + +/* CHARACTER JOBU, JOBVT */ +/* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N */ +/* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESVD computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, optionally computing the left and/or right singular */ +/* > vectors. The SVD is written */ +/* > */ +/* > A = U * SIGMA * transpose(V) */ +/* > */ +/* > where SIGMA is an M-by-N matrix which is zero except for its */ +/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ +/* > V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ +/* > are the singular values of A; they are real and non-negative, and */ +/* > are returned in descending order. The first f2cmin(m,n) columns of */ +/* > U and V are the left and right singular vectors of A. */ +/* > */ +/* > Note that the routine returns V**T, not V. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix U: */ +/* > = 'A': all M columns of U are returned in array U: */ +/* > = 'S': the first f2cmin(m,n) columns of U (the left singular */ +/* > vectors) are returned in the array U; */ +/* > = 'O': the first f2cmin(m,n) columns of U (the left singular */ +/* > vectors) are overwritten on the array A; */ +/* > = 'N': no columns of U (no left singular vectors) are */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVT */ +/* > \verbatim */ +/* > JOBVT is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix */ +/* > V**T: */ +/* > = 'A': all N rows of V**T are returned in the array VT; */ +/* > = 'S': the first f2cmin(m,n) rows of V**T (the right singular */ +/* > vectors) are returned in the array VT; */ +/* > = 'O': the first f2cmin(m,n) rows of V**T (the right singular */ +/* > vectors) are overwritten on the array A; */ +/* > = 'N': no rows of V**T (no right singular vectors) are */ +/* > computed. */ +/* > */ +/* > JOBVT and JOBU cannot both be 'O'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if JOBU = 'O', A is overwritten with the first f2cmin(m,n) */ +/* > columns of U (the left singular vectors, */ +/* > stored columnwise); */ +/* > if JOBVT = 'O', A is overwritten with the first f2cmin(m,n) */ +/* > rows of V**T (the right singular vectors, */ +/* > stored rowwise); */ +/* > if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */ +/* > are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The singular values of A, sorted so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension (LDU,UCOL) */ +/* > (LDU,M) if JOBU = 'A' or (LDU,f2cmin(M,N)) if JOBU = 'S'. */ +/* > If JOBU = 'A', U contains the M-by-M orthogonal matrix U; */ +/* > if JOBU = 'S', U contains the first f2cmin(m,n) columns of U */ +/* > (the left singular vectors, stored columnwise); */ +/* > if JOBU = 'N' or 'O', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1; if */ +/* > JOBU = 'S' or 'A', LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is DOUBLE PRECISION array, dimension (LDVT,N) */ +/* > If JOBVT = 'A', VT contains the N-by-N orthogonal matrix */ +/* > V**T; */ +/* > if JOBVT = 'S', VT contains the first f2cmin(m,n) rows of */ +/* > V**T (the right singular vectors, stored rowwise); */ +/* > if JOBVT = 'N' or 'O', VT is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1; if */ +/* > JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= f2cmin(M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ +/* > if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged */ +/* > superdiagonal elements of an upper bidiagonal matrix B */ +/* > whose diagonal is in S (not necessarily sorted). B */ +/* > satisfies A = U * B * VT, so it has the same singular values */ +/* > as A, and singular vectors related by U and VT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): */ +/* > - PATH 1 (M much larger than N, JOBU='N') */ +/* > - PATH 1t (N much larger than M, JOBVT='N') */ +/* > LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if DBDSQR did not converge, INFO specifies how many */ +/* > superdiagonals of an intermediate bidiagonal form B */ +/* > did not converge to zero. See the description of WORK */ +/* > above for details. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup doubleGEsing */ + +/* ===================================================================== */ +/* Subroutine */ int dgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + doublereal *a, integer *lda, doublereal *s, doublereal *u, integer * + ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], + i__2, i__3, i__4; + char ch__1[2]; + + /* Local variables */ + integer iscl; + doublereal anrm; + integer ierr, itau, ncvt, nrvt, lwork_dgebrd__, lwork_dgelqf__, + lwork_dgeqrf__, i__; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + integer chunk, minmn, wrkbl, itaup, itauq, mnthr, iwork; + logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs; + integer ie; + extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + integer ir, bdspac, iu; + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + dbdsqr_(char *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *), dorgbr_(char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + doublereal bignum; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + integer ldwrkr, minwrk, ldwrku, maxwrk; + doublereal smlnum; + logical lquery, wntuas, wntvas; + integer lwork_dorgbr_p__, lwork_dorgbr_q__, lwork_dorglq_m__, + lwork_dorglq_n__, lwork_dorgqr_m__, lwork_dorgqr_n__, blk, ncu; + doublereal dum[1], eps; + integer nru; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + --work; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + wntua = lsame_(jobu, "A"); + wntus = lsame_(jobu, "S"); + wntuas = wntua || wntus; + wntuo = lsame_(jobu, "O"); + wntun = lsame_(jobu, "N"); + wntva = lsame_(jobvt, "A"); + wntvs = lsame_(jobvt, "S"); + wntvas = wntva || wntvs; + wntvo = lsame_(jobvt, "O"); + wntvn = lsame_(jobvt, "N"); + lquery = *lwork == -1; + + if (! (wntua || wntus || wntuo || wntun)) { + *info = -1; + } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else if (*ldu < 1 || wntuas && *ldu < *m) { + *info = -9; + } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (*m >= *n && minmn > 0) { + +/* Compute space needed for DBDSQR */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + bdspac = *n * 5; +/* Compute space needed for DGEQRF */ + dgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dgeqrf__ = (integer) dum[0]; +/* Compute space needed for DORGQR */ + dorgqr_(m, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dorgqr_n__ = (integer) dum[0]; + dorgqr_(m, m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dorgqr_m__ = (integer) dum[0]; +/* Compute space needed for DGEBRD */ + dgebrd_(n, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, + &ierr); + lwork_dgebrd__ = (integer) dum[0]; +/* Compute space needed for DORGBR P */ + dorgbr_("P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dorgbr_p__ = (integer) dum[0]; +/* Compute space needed for DORGBR Q */ + dorgbr_("Q", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dorgbr_q__ = (integer) dum[0]; + + if (*m >= mnthr) { + if (wntun) { + +/* Path 1 (M much larger than N, JOBU='N') */ + + maxwrk = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_dgebrd__; + maxwrk = f2cmax(i__2,i__3); + if (wntvo || wntvas) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *n << 2; + minwrk = f2cmax(i__2,bdspac); + } else if (wntuo && wntvn) { + +/* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; + maxwrk = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntuo && wntvas) { + +/* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; + maxwrk = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntus && wntvn) { + +/* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntus && wntvo) { + +/* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = (*n << 1) * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntus && wntvas) { + +/* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntua && wntvn) { + +/* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntua && wntvo) { + +/* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = (*n << 1) * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntua && wntvas) { + +/* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } + } else { + +/* Path 10 (M at least N, but not much larger) */ + + dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, & + c_n1, &ierr); + lwork_dgebrd__ = (integer) dum[0]; + maxwrk = *n * 3 + lwork_dgebrd__; + if (wntus || wntuo) { + dorgbr_("Q", m, n, n, &a[a_offset], lda, dum, dum, &c_n1, + &ierr); + lwork_dorgbr_q__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + if (wntua) { + dorgbr_("Q", m, m, n, &a[a_offset], lda, dum, dum, &c_n1, + &ierr); + lwork_dorgbr_q__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + if (! wntvn) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } + } else if (minmn > 0) { + +/* Compute space needed for DBDSQR */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + bdspac = *m * 5; +/* Compute space needed for DGELQF */ + dgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dgelqf__ = (integer) dum[0]; +/* Compute space needed for DORGLQ */ + dorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr); + lwork_dorglq_n__ = (integer) dum[0]; + dorglq_(m, n, m, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dorglq_m__ = (integer) dum[0]; +/* Compute space needed for DGEBRD */ + dgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, + &ierr); + lwork_dgebrd__ = (integer) dum[0]; +/* Compute space needed for DORGBR P */ + dorgbr_("P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr); + lwork_dorgbr_p__ = (integer) dum[0]; +/* Compute space needed for DORGBR Q */ + dorgbr_("Q", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr); + lwork_dorgbr_q__ = (integer) dum[0]; + if (*n >= mnthr) { + if (wntvn) { + +/* Path 1t(N much larger than M, JOBVT='N') */ + + maxwrk = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_dgebrd__; + maxwrk = f2cmax(i__2,i__3); + if (wntuo || wntuas) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *m << 2; + minwrk = f2cmax(i__2,bdspac); + } else if (wntvo && wntun) { + +/* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; + maxwrk = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntvo && wntuas) { + +/* Path 3t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='O') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; + maxwrk = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntvs && wntun) { + +/* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntvs && wntuo) { + +/* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = (*m << 1) * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntvs && wntuas) { + +/* Path 6t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='S') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntva && wntun) { + +/* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntva && wntuo) { + +/* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = (*m << 1) * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntva && wntuas) { + +/* Path 9t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='A') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } + } else { + +/* Path 10t(N greater than M, but not much larger) */ + + dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, & + c_n1, &ierr); + lwork_dgebrd__ = (integer) dum[0]; + maxwrk = *m * 3 + lwork_dgebrd__; + if (wntvs || wntvo) { +/* Compute space needed for DORGBR P */ + dorgbr_("P", m, n, m, &a[a_offset], n, dum, dum, &c_n1, & + ierr); + lwork_dorgbr_p__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + if (wntva) { + dorgbr_("P", n, n, m, &a[a_offset], n, dum, dum, &c_n1, & + ierr); + lwork_dorgbr_p__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + if (! wntun) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } + } + maxwrk = f2cmax(maxwrk,minwrk); + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("DGESVD", &i__2, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = sqrt(dlamch_("S")) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce using the QR */ +/* decomposition (if sufficient workspace available) */ + + if (*m >= mnthr) { + + if (wntun) { + +/* Path 1 (M much larger than N, JOBU='N') */ +/* No left singular vectors to be computed */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & + i__2, &ierr); + +/* Zero out below R */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[a_dim1 + 2], + lda); + } + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + ncvt = 0; + if (wntvo || wntvas) { + +/* If right singular vectors desired, generate P'. */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], & + work[iwork], &i__2, &ierr); + ncvt = *n; + } + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in A if desired */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork], + info); + +/* If right singular vectors desired in VT, copy them there */ + + if (wntvas) { + dlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + } + + } else if (wntuo && wntvn) { + +/* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ +/* N left singular vectors to be overwritten on A and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *n; + if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) { + +/* WORK(IU) is LDA by N, WORK(IR) is LDA by N */ + + ldwrku = *lda; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *n; + if (*lwork >= f2cmax(i__2,i__3) + *n * *n) { + +/* WORK(IU) is LDA by N, WORK(IR) is N by N */ + + ldwrku = *lda; + ldwrkr = *n; + } else { + +/* WORK(IU) is LDWRKU by N, WORK(IR) is N by N */ + + ldwrku = (*lwork - *n * *n - *n) / *n; + ldwrkr = *n; + } + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy R to WORK(IR) and zero out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 1], + &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Generate left vectors bidiagonalizing R */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & + work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, & + c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork] + , info); + iu = ie + *n; + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in WORK(IU) and copying to A */ +/* (Workspace: need N*N + 2*N, prefer N*N + M*N + N) */ + + i__2 = *m; + i__3 = ldwrku; + for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__3) { +/* Computing MIN */ + i__4 = *m - i__ + 1; + chunk = f2cmin(i__4,ldwrku); + dgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ + + a_dim1], lda, &work[ir], &ldwrkr, &c_b57, & + work[iu], &ldwrku); + dlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize A */ +/* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB) */ + + i__3 = *lwork - iwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + +/* Generate left vectors bidiagonalizing A */ +/* (Workspace: need 4*N, prefer 3*N + N*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[iwork], &i__3, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, & + c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], + info); + + } + + } else if (wntuo && wntvas) { + +/* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */ +/* N left singular vectors to be overwritten on A and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__3 = *n << 2; + if (*lwork >= *n * *n + f2cmax(i__3,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *n; + if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *n; + if (*lwork >= f2cmax(i__3,i__2) + *n * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ldwrkr = *n; + } else { + +/* WORK(IU) is LDWRKU by N and WORK(IR) is N by N */ + + ldwrku = (*lwork - *n * *n - *n) / *n; + ldwrkr = *n; + } + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__3 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__3, &ierr); + +/* Copy R to VT, zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__3 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__3, &i__2, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt); + } + +/* Generate Q in A */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__3 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__3, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT, copying result to WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__3 = *lwork - iwork + 1; + dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], &i__3, & + ierr); + dlacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], & + ldwrkr); + +/* Generate left vectors bidiagonalizing R in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & + work[iwork], &i__3, &ierr); + +/* Generate right vectors bidiagonalizing R in VT */ +/* (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__3, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) and computing right */ +/* singular vectors of R in VT */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info); + iu = ie + *n; + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in WORK(IU) and copying to A */ +/* (Workspace: need N*N + 2*N, prefer N*N + M*N + N) */ + + i__3 = *m; + i__2 = ldwrku; + for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += + i__2) { +/* Computing MIN */ + i__4 = *m - i__ + 1; + chunk = f2cmin(i__4,ldwrku); + dgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ + + a_dim1], lda, &work[ir], &ldwrkr, &c_b57, & + work[iu], &ldwrku); + dlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L20: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy R to VT, zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt); + } + +/* Generate Q in A */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], &i__2, & + ierr); + +/* Multiply Q in A by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, & + work[itauq], &a[a_offset], lda, &work[iwork], & + i__2, &ierr); + +/* Generate right vectors bidiagonalizing R in VT */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntus) { + + if (wntvn) { + +/* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ +/* N left singular vectors to be computed in U and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IR) is LDA by N */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + 1], &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate left vectors bidiagonalizing R in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], + dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & + work[iwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in U */ +/* (Workspace: need N*N) */ + + dgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, & + work[ir], &ldwrkr, &c_b57, &u[u_offset], ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], + dum, &c__1, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntvo) { + +/* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *n) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } else { + +/* WORK(IU) is N by N and WORK(IR) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*N*N + 4*N, */ +/* prefer 2*N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*N*N + 4*N-1, */ +/* prefer 2*N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in WORK(IR) */ +/* (Workspace: need 2*N*N + BDSPAC) */ + + dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, + &work[iwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in U */ +/* (Workspace: need N*N) */ + + dgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, & + work[iu], &ldwrku, &c_b57, &u[u_offset], ldu); + +/* Copy right singular vectors of R to A */ +/* (Workspace: need N*N) */ + + dlacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + +/* Generate right vectors bidiagonalizing R in A */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], + &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, + &work[iwork], info); + + } + + } else if (wntvas) { + +/* Path 6 (M much larger than N, JOBU='S', JOBVT='S' */ +/* or 'A') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is N by N */ + + ldwrku = *n; + } + itau = iu + ldwrku * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to VT */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], + ldvt); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need N*N + 4*N-1, */ +/* prefer N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in VT */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[iu], &ldwrku, dum, & + c__1, &work[iwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in U */ +/* (Workspace: need N*N) */ + + dgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, & + work[iu], &ldwrku, &c_b57, &u[u_offset], ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to VT, zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt); + } + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], + &work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in VT */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, + &work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } else if (wntua) { + + if (wntvn) { + +/* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ +/* M left singular vectors to be computed in U and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IR) is LDA by N */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Copy R to WORK(IR), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + 1], &ldwrkr); + +/* Generate Q in U */ +/* (Workspace: need N*N + N + M, prefer N*N + N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], + dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & + work[iwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IR), storing result in A */ +/* (Workspace: need N*N) */ + + dgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, & + work[ir], &ldwrkr, &c_b57, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N + M, prefer N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in A */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], + dum, &c__1, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntvo) { + +/* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *n) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } else { + +/* WORK(IU) is N by N and WORK(IR) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*N*N + 4*N, */ +/* prefer 2*N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*N*N + 4*N-1, */ +/* prefer 2*N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in WORK(IR) */ +/* (Workspace: need 2*N*N + BDSPAC) */ + + dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, + &work[iwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (Workspace: need N*N) */ + + dgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, & + work[iu], &ldwrku, &c_b57, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Copy right singular vectors of R from WORK(IR) to A */ + + dlacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N + M, prefer N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in A */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + +/* Generate right bidiagonalizing vectors in A */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], + &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, + &work[iwork], info); + + } + + } else if (wntvas) { + +/* Path 9 (M much larger than N, JOBU='A', JOBVT='S' */ +/* or 'A') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is N by N */ + + ldwrku = *n; + } + itau = iu + ldwrku * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N*N + N + M, prefer N*N + N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to VT */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], + ldvt); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need N*N + 4*N-1, */ +/* prefer N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in VT */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[iu], &ldwrku, dum, & + c__1, &work[iwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (Workspace: need N*N) */ + + dgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, & + work[iu], &ldwrku, &c_b57, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N + M, prefer N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R from A to VT, zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt); + } + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], + &work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in VT */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, + &work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } + + } else { + +/* M .LT. MNTHR */ + +/* Path 10 (M at least N, but not much larger) */ +/* Reduce to bidiagonal form without QR decomposition */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize A */ +/* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__2, &ierr); + if (wntuas) { + +/* If left singular vectors desired in U, copy result to U */ +/* and generate left bidiagonalizing vectors in U */ +/* (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB) */ + + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + if (wntus) { + ncu = *n; + } + if (wntua) { + ncu = *m; + } + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__2, &ierr); + } + if (wntvas) { + +/* If right singular vectors desired in VT, copy result to */ +/* VT and generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[iwork], &i__2, &ierr); + } + if (wntuo) { + +/* If left singular vectors desired in A, generate left */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*N, prefer 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvo) { + +/* If right singular vectors desired in A, generate right */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__2, &ierr); + } + iwork = ie + *n; + if (wntuas || wntuo) { + nru = *m; + } + if (wntun) { + nru = 0; + } + if (wntvas || wntvo) { + ncvt = *n; + } + if (wntvn) { + ncvt = 0; + } + if (! wntuo && ! wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + } else if (! wntuo && wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info); + } else { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in A and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info); + } + + } + + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce using the LQ decomposition (if */ +/* sufficient workspace available) */ + + if (*n >= mnthr) { + + if (wntvn) { + +/* Path 1t(N much larger than M, JOBVT='N') */ +/* No right singular vectors to be computed */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & + i__2, &ierr); + +/* Zero out above L */ + + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + if (wntuo || wntuas) { + +/* If left singular vectors desired, generate Q */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], & + work[iwork], &i__2, &ierr); + } + iwork = ie + *m; + nru = 0; + if (wntuo || wntuas) { + nru = *m; + } + +/* Perform bidiagonal QR iteration, computing left singular */ +/* vectors of A in A if desired */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, & + c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], + info); + +/* If left singular vectors desired in U, copy them there */ + + if (wntuas) { + dlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + } + + } else if (wntvo && wntun) { + +/* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ +/* M right singular vectors to be overwritten on A and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *m; + if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *m; + if (*lwork >= f2cmax(i__2,i__3) + *m * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is M by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ + + ldwrku = *m; + chunk = (*lwork - *m * *m - *m) / *m; + ldwrkr = *m; + } + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy L to WORK(IR) and zero out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Generate right vectors bidiagonalizing L */ +/* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & + work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork] + , info); + iu = ie + *m; + +/* Multiply right singular vectors of L in WORK(IR) by Q */ +/* in A, storing result in WORK(IU) and copying to A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M*N + M) */ + + i__2 = *n; + i__3 = chunk; + for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__3) { +/* Computing MIN */ + i__4 = *n - i__ + 1; + blk = f2cmin(i__4,chunk); + dgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], & + ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, & + work[iu], &ldwrku); + dlacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * + a_dim1 + 1], lda); +/* L30: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB) */ + + i__3 = *lwork - iwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + +/* Generate right vectors bidiagonalizing A */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[iwork], &i__3, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, dum, &c__1, dum, &c__1, &work[ + iwork], info); + + } + + } else if (wntvo && wntuas) { + +/* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */ +/* M right singular vectors to be overwritten on A and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__3 = *m << 2; + if (*lwork >= *m * *m + f2cmax(i__3,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *m; + if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *m; + if (*lwork >= f2cmax(i__3,i__2) + *m * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is M by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ + + ldwrku = *m; + chunk = (*lwork - *m * *m - *m) / *m; + ldwrkr = *m; + } + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__3 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__3, &ierr); + +/* Copy L to U, zeroing about above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__3 = *m - 1; + i__2 = *m - 1; + dlaset_("U", &i__3, &i__2, &c_b57, &c_b57, &u[(u_dim1 << + 1) + 1], ldu); + +/* Generate Q in A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__3 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__3, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U, copying result to WORK(IR) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__3 = *lwork - iwork + 1; + dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + dlacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr); + +/* Generate right vectors bidiagonalizing L in WORK(IR) */ +/* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & + work[iwork], &i__3, &ierr); + +/* Generate left vectors bidiagonalizing L in U */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__3, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U, and computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir], + &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info); + iu = ie + *m; + +/* Multiply right singular vectors of L in WORK(IR) by Q */ +/* in A, storing result in WORK(IU) and copying to A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M*N + M)) */ + + i__3 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += + i__2) { +/* Computing MIN */ + i__4 = *n - i__ + 1; + blk = f2cmin(i__4,chunk); + dgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], & + ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, & + work[iu], &ldwrku); + dlacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * + a_dim1 + 1], lda); +/* L40: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 << + 1) + 1], ldu); + +/* Generate Q in A */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in A */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[ + itaup], &a[a_offset], lda, &work[iwork], &i__2, & + ierr); + +/* Generate left vectors bidiagonalizing L in U */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntvs) { + + if (wntun) { + +/* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IR) is LDA by M */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is M by M */ + + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IR), zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate right vectors bidiagonalizing L in */ +/* WORK(IR) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], & + work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IR) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + dgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr, + &a[a_offset], lda, &c_b57, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy result to VT */ + + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], & + vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntuo) { + +/* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *m) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is M by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by M and WORK(IR) is M by M */ + + ldwrku = *m; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out below it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*M*M + 4*M, */ +/* prefer 2*M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*M*M + 4*M-1, */ +/* prefer 2*M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in WORK(IR) and computing */ +/* right singular vectors of L in WORK(IU) */ +/* (Workspace: need 2*M*M + BDSPAC) */ + + dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + dgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, + &a[a_offset], lda, &c_b57, &vt[vt_offset], + ldvt); + +/* Copy left singular vectors of L to A */ +/* (Workspace: need M*M) */ + + dlacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors of L in A */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, compute left */ +/* singular vectors of A in A and compute right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, & + c__1, &work[iwork], info); + + } + + } else if (wntuas) { + +/* Path 6t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is LDA by M */ + + ldwrku = *m; + } + itau = iu + ldwrku * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], + ldu); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need M*M + 4*M-1, */ +/* prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U and computing right */ +/* singular vectors of L in WORK(IU) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + dgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, + &a[a_offset], lda, &c_b57, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], + ldu); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 + << 1) + 1], ldu); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in U by Q */ +/* in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } else if (wntva) { + + if (wntun) { + +/* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IR) is LDA by M */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is M by M */ + + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Copy L to WORK(IR), zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in VT */ +/* (Workspace: need M*M + M + N, prefer M*M + M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need M*M + 4*M-1, */ +/* prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], & + work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IR) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + dgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr, + &vt[vt_offset], ldvt, &c_b57, &a[a_offset], + lda); + +/* Copy right singular vectors of A from A to VT */ + + dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M + N, prefer M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in A by Q */ +/* in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], & + vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntuo) { + +/* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *m) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is M by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by M and WORK(IR) is M by M */ + + ldwrku = *m; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*M*M + 4*M, */ +/* prefer 2*M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*M*M + 4*M-1, */ +/* prefer 2*M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in WORK(IR) and computing */ +/* right singular vectors of L in WORK(IU) */ +/* (Workspace: need 2*M*M + BDSPAC) */ + + dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + dgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, + &vt[vt_offset], ldvt, &c_b57, &a[a_offset], + lda); + +/* Copy right singular vectors of A from A to VT */ + + dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Copy left singular vectors of A from WORK(IR) to A */ + + dlacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M + N, prefer M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in A by Q */ +/* in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, & + c__1, &work[iwork], info); + + } + + } else if (wntuas) { + +/* Path 9t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IU) is LDA by M */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is M by M */ + + ldwrku = *m; + } + itau = iu + ldwrku * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M*M + M + N, prefer M*M + M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], + ldu); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U and computing right */ +/* singular vectors of L in WORK(IU) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + dgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, + &vt[vt_offset], ldvt, &c_b57, &a[a_offset], + lda); + +/* Copy right singular vectors of A from A to VT */ + + dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M + N, prefer M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], + ldu); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 + << 1) + 1], ldu); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in U by Q */ +/* in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } + + } else { + +/* N .LT. MNTHR */ + +/* Path 10t(N greater than M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__2, &ierr); + if (wntuas) { + +/* If left singular vectors desired in U, copy result to U */ +/* and generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB) */ + + dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvas) { + +/* If right singular vectors desired in VT, copy result to */ +/* VT and generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB) */ + + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + if (wntva) { + nrvt = *n; + } + if (wntvs) { + nrvt = *m; + } + i__2 = *lwork - iwork + 1; + dorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__2, &ierr); + } + if (wntuo) { + +/* If left singular vectors desired in A, generate left */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvo) { + +/* If right singular vectors desired in A, generate right */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__2, &ierr); + } + iwork = ie + *m; + if (wntuas || wntuo) { + nru = *m; + } + if (wntun) { + nru = 0; + } + if (wntvas || wntvo) { + ncvt = *n; + } + if (wntvn) { + ncvt = 0; + } + if (! wntuo && ! wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + } else if (! wntuo && wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info); + } else { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in A and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info); + } + + } + + } + +/* If DBDSQR failed to converge, copy unconverged superdiagonals */ +/* to WORK( 2:MINMN ) */ + + if (*info != 0) { + if (ie > 2) { + i__2 = minmn - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__ + 1] = work[i__ + ie - 1]; +/* L50: */ + } + } + if (ie < 2) { + for (i__ = minmn - 1; i__ >= 1; --i__) { + work[i__ + 1] = work[i__ + ie - 1]; +/* L60: */ + } + } + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm > bignum) { + i__2 = minmn - 1; + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2], + &minmn, &ierr); + } + if (anrm < smlnum) { + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm < smlnum) { + i__2 = minmn - 1; + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2], + &minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGESVD */ + +} /* dgesvd_ */ + diff --git a/lapack-netlib/SRC/dgesvdq.c b/lapack-netlib/SRC/dgesvdq.c new file mode 100644 index 000000000..f85b915d3 --- /dev/null +++ b/lapack-netlib/SRC/dgesvdq.c @@ -0,0 +1,2128 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method + for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESVDQ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, */ +/* S, U, LDU, V, LDV, NUMRANK, IWORK, LIWORK, */ +/* WORK, LWORK, RWORK, LRWORK, INFO ) */ + +/* IMPLICIT NONE */ +/* CHARACTER JOBA, JOBP, JOBR, JOBU, JOBV */ +/* INTEGER M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LWORK, LRWORK, */ +/* INFO */ +/* DOUBLE PRECISION A( LDA, * ), U( LDU, * ), V( LDV, * ), WORK( * ) */ +/* DOUBLE PRECISION S( * ), RWORK( * ) */ +/* INTEGER IWORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESVDQ computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, where M >= N. The SVD of A is written as */ +/* > [++] [xx] [x0] [xx] */ +/* > A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] */ +/* > [++] [xx] */ +/* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */ +/* > matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */ +/* > of SIGMA are the singular values of A. The columns of U and V are the */ +/* > left and the right singular vectors of A, respectively. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBA */ +/* > \verbatim */ +/* > JOBA is CHARACTER*1 */ +/* > Specifies the level of accuracy in the computed SVD */ +/* > = 'A' The requested accuracy corresponds to having the backward */ +/* > error bounded by || delta A ||_F <= f(m,n) * EPS * || A ||_F, */ +/* > where EPS = DLAMCH('Epsilon'). This authorises DGESVDQ to */ +/* > truncate the computed triangular factor in a rank revealing */ +/* > QR factorization whenever the truncated part is below the */ +/* > threshold of the order of EPS * ||A||_F. This is aggressive */ +/* > truncation level. */ +/* > = 'M' Similarly as with 'A', but the truncation is more gentle: it */ +/* > is allowed only when there is a drop on the diagonal of the */ +/* > triangular factor in the QR factorization. This is medium */ +/* > truncation level. */ +/* > = 'H' High accuracy requested. No numerical rank determination based */ +/* > on the rank revealing QR factorization is attempted. */ +/* > = 'E' Same as 'H', and in addition the condition number of column */ +/* > scaled A is estimated and returned in RWORK(1). */ +/* > N^(-1/4)*RWORK(1) <= ||pinv(A_scaled)||_2 <= N^(1/4)*RWORK(1) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBP */ +/* > \verbatim */ +/* > JOBP is CHARACTER*1 */ +/* > = 'P' The rows of A are ordered in decreasing order with respect to */ +/* > ||A(i,:)||_\infty. This enhances numerical accuracy at the cost */ +/* > of extra data movement. Recommended for numerical robustness. */ +/* > = 'N' No row pivoting. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBR */ +/* > \verbatim */ +/* > JOBR is CHARACTER*1 */ +/* > = 'T' After the initial pivoted QR factorization, DGESVD is applied to */ +/* > the transposed R**T of the computed triangular factor R. This involves */ +/* > some extra data movement (matrix transpositions). Useful for */ +/* > experiments, research and development. */ +/* > = 'N' The triangular factor R is given as input to DGESVD. This may be */ +/* > preferred as it involves less data movement. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > = 'A' All M left singular vectors are computed and returned in the */ +/* > matrix U. See the description of U. */ +/* > = 'S' or 'U' N = f2cmin(M,N) left singular vectors are computed and returned */ +/* > in the matrix U. See the description of U. */ +/* > = 'R' Numerical rank NUMRANK is determined and only NUMRANK left singular */ +/* > vectors are computed and returned in the matrix U. */ +/* > = 'F' The N left singular vectors are returned in factored form as the */ +/* > product of the Q factor from the initial QR factorization and the */ +/* > N left singular vectors of (R**T , 0)**T. If row pivoting is used, */ +/* > then the necessary information on the row pivoting is stored in */ +/* > IWORK(N+1:N+M-1). */ +/* > = 'N' The left singular vectors are not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > = 'A', 'V' All N right singular vectors are computed and returned in */ +/* > the matrix V. */ +/* > = 'R' Numerical rank NUMRANK is determined and only NUMRANK right singular */ +/* > vectors are computed and returned in the matrix V. This option is */ +/* > allowed only if JOBU = 'R' or JOBU = 'N'; otherwise it is illegal. */ +/* > = 'N' The right singular vectors are not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array of dimensions LDA x N */ +/* > On entry, the input matrix A. */ +/* > On exit, if JOBU .NE. 'N' or JOBV .NE. 'N', the lower triangle of A contains */ +/* > the Householder vectors as stored by DGEQP3. If JOBU = 'F', these Householder */ +/* > vectors together with WORK(1:N) can be used to restore the Q factors from */ +/* > the initial pivoted QR factorization of A. See the description of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER. */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array of dimension N. */ +/* > The singular values of A, ordered so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension */ +/* > LDU x M if JOBU = 'A'; see the description of LDU. In this case, */ +/* > on exit, U contains the M left singular vectors. */ +/* > LDU x N if JOBU = 'S', 'U', 'R' ; see the description of LDU. In this */ +/* > case, U contains the leading N or the leading NUMRANK left singular vectors. */ +/* > LDU x N if JOBU = 'F' ; see the description of LDU. In this case U */ +/* > contains N x N orthogonal matrix that can be used to form the left */ +/* > singular vectors. */ +/* > If JOBU = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER. */ +/* > The leading dimension of the array U. */ +/* > If JOBU = 'A', 'S', 'U', 'R', LDU >= f2cmax(1,M). */ +/* > If JOBU = 'F', LDU >= f2cmax(1,N). */ +/* > Otherwise, LDU >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension */ +/* > LDV x N if JOBV = 'A', 'V', 'R' or if JOBA = 'E' . */ +/* > If JOBV = 'A', or 'V', V contains the N-by-N orthogonal matrix V**T; */ +/* > If JOBV = 'R', V contains the first NUMRANK rows of V**T (the right */ +/* > singular vectors, stored rowwise, of the NUMRANK largest singular values). */ +/* > If JOBV = 'N' and JOBA = 'E', V is used as a workspace. */ +/* > If JOBV = 'N', and JOBA.NE.'E', V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If JOBV = 'A', 'V', 'R', or JOBA = 'E', LDV >= f2cmax(1,N). */ +/* > Otherwise, LDV >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] NUMRANK */ +/* > \verbatim */ +/* > NUMRANK is INTEGER */ +/* > NUMRANK is the numerical rank first determined after the rank */ +/* > revealing QR factorization, following the strategy specified by the */ +/* > value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK */ +/* > leading singular values and vectors are then requested in the call */ +/* > of DGESVD. The final value of NUMRANK might be further reduced if */ +/* > some singular values are computed as zeros. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (f2cmax(1, LIWORK)). */ +/* > On exit, IWORK(1:N) contains column pivoting permutation of the */ +/* > rank revealing QR factorization. */ +/* > If JOBP = 'P', IWORK(N+1:N+M-1) contains the indices of the sequence */ +/* > of row swaps used in row pivoting. These can be used to restore the */ +/* > left singular vectors in the case JOBU = 'F'. */ +/* > */ +/* > If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */ +/* > LIWORK(1) returns the minimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > LIWORK >= N + M - 1, if JOBP = 'P' and JOBA .NE. 'E'; */ +/* > LIWORK >= N if JOBP = 'N' and JOBA .NE. 'E'; */ +/* > LIWORK >= N + M - 1 + N, if JOBP = 'P' and JOBA = 'E'; */ +/* > LIWORK >= N + N if JOBP = 'N' and JOBA = 'E'. */ + +/* > If LIWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates and returns the optimal and minimal sizes */ +/* > for the WORK, IWORK, and RWORK arrays, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (f2cmax(2, LWORK)), used as a workspace. */ +/* > On exit, if, on entry, LWORK.NE.-1, WORK(1:N) contains parameters */ +/* > needed to recover the Q factor from the QR factorization computed by */ +/* > DGEQP3. */ +/* > */ +/* > If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */ +/* > WORK(1) returns the optimal LWORK, and */ +/* > WORK(2) returns the minimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. It is determined as follows: */ +/* > Let LWQP3 = 3*N+1, LWCON = 3*N, and let */ +/* > LWORQ = { MAX( N, 1 ), if JOBU = 'R', 'S', or 'U' */ +/* > { MAX( M, 1 ), if JOBU = 'A' */ +/* > LWSVD = MAX( 5*N, 1 ) */ +/* > LWLQF = MAX( N/2, 1 ), LWSVD2 = MAX( 5*(N/2), 1 ), LWORLQ = MAX( N, 1 ), */ +/* > LWQRF = MAX( N/2, 1 ), LWORQ2 = MAX( N, 1 ) */ +/* > Then the minimal value of LWORK is: */ +/* > = MAX( N + LWQP3, LWSVD ) if only the singular values are needed; */ +/* > = MAX( N + LWQP3, LWCON, LWSVD ) if only the singular values are needed, */ +/* > and a scaled condition estimate requested; */ +/* > */ +/* > = N + MAX( LWQP3, LWSVD, LWORQ ) if the singular values and the left */ +/* > singular vectors are requested; */ +/* > = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the singular values and the left */ +/* > singular vectors are requested, and also */ +/* > a scaled condition estimate requested; */ +/* > */ +/* > = N + MAX( LWQP3, LWSVD ) if the singular values and the right */ +/* > singular vectors are requested; */ +/* > = N + MAX( LWQP3, LWCON, LWSVD ) if the singular values and the right */ +/* > singular vectors are requested, and also */ +/* > a scaled condition etimate requested; */ +/* > */ +/* > = N + MAX( LWQP3, LWSVD, LWORQ ) if the full SVD is requested with JOBV = 'R'; */ +/* > independent of JOBR; */ +/* > = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the full SVD is requested, */ +/* > JOBV = 'R' and, also a scaled condition */ +/* > estimate requested; independent of JOBR; */ +/* > = MAX( N + MAX( LWQP3, LWSVD, LWORQ ), */ +/* > N + MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ) ) if the */ +/* > full SVD is requested with JOBV = 'A' or 'V', and */ +/* > JOBR ='N' */ +/* > = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ), */ +/* > N + MAX( LWQP3, LWCON, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ ) ) */ +/* > if the full SVD is requested with JOBV = 'A' or 'V', and */ +/* > JOBR ='N', and also a scaled condition number estimate */ +/* > requested. */ +/* > = MAX( N + MAX( LWQP3, LWSVD, LWORQ ), */ +/* > N + MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) if the */ +/* > full SVD is requested with JOBV = 'A', 'V', and JOBR ='T' */ +/* > = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ), */ +/* > N + MAX( LWQP3, LWCON, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) */ +/* > if the full SVD is requested with JOBV = 'A' or 'V', and */ +/* > JOBR ='T', and also a scaled condition number estimate */ +/* > requested. */ +/* > Finally, LWORK must be at least two: LWORK = MAX( 2, LWORK ). */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates and returns the optimal and minimal sizes */ +/* > for the WORK, IWORK, and RWORK arrays, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, LRWORK)). */ +/* > On exit, */ +/* > 1. If JOBA = 'E', RWORK(1) contains an estimate of the condition */ +/* > number of column scaled A. If A = C * D where D is diagonal and C */ +/* > has unit columns in the Euclidean norm, then, assuming full column rank, */ +/* > N^(-1/4) * RWORK(1) <= ||pinv(C)||_2 <= N^(1/4) * RWORK(1). */ +/* > Otherwise, RWORK(1) = -1. */ +/* > 2. RWORK(2) contains the number of singular values computed as */ +/* > exact zeros in DGESVD applied to the upper triangular or trapeziodal */ +/* > R (from the initial QR factorization). In case of early exit (no call to */ +/* > DGESVD, such as in the case of zero matrix) RWORK(2) = -1. */ +/* > */ +/* > If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */ +/* > RWORK(1) returns the minimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER. */ +/* > The dimension of the array RWORK. */ +/* > If JOBP ='P', then LRWORK >= MAX(2, M). */ +/* > Otherwise, LRWORK >= 2 */ + +/* > If LRWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates and returns the optimal and minimal sizes */ +/* > for the WORK, IWORK, and RWORK arrays, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if DBDSQR did not converge, INFO specifies how many superdiagonals */ +/* > of an intermediate bidiagonal form B (computed in DGESVD) did not */ +/* > converge to zero. */ +/* > \endverbatim */ + +/* > \par Further Details: */ +/* ======================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > 1. The data movement (matrix transpose) is coded using simple nested */ +/* > DO-loops because BLAS and LAPACK do not provide corresponding subroutines. */ +/* > Those DO-loops are easily identified in this source code - by the CONTINUE */ +/* > statements labeled with 11**. In an optimized version of this code, the */ +/* > nested DO loops should be replaced with calls to an optimized subroutine. */ +/* > 2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause */ +/* > column norm overflow. This is the minial precaution and it is left to the */ +/* > SVD routine (CGESVD) to do its own preemptive scaling if potential over- */ +/* > or underflows are detected. To avoid repeated scanning of the array A, */ +/* > an optimal implementation would do all necessary scaling before calling */ +/* > CGESVD and the scaling in CGESVD can be switched off. */ +/* > 3. Other comments related to code optimization are given in comments in the */ +/* > code, enlosed in [[double brackets]]. */ +/* > \endverbatim */ + +/* > \par Bugs, examples and comments */ +/* =========================== */ + +/* > \verbatim */ +/* > Please report all bugs and send interesting examples and/or comments to */ +/* > drmac@math.hr. Thank you. */ +/* > \endverbatim */ + +/* > \par References */ +/* =============== */ + +/* > \verbatim */ +/* > [1] Zlatko Drmac, Algorithm 977: A QR-Preconditioned QR SVD Method for */ +/* > Computing the SVD with High Accuracy. ACM Trans. Math. Softw. */ +/* > 44(1): 11:1-11:30 (2017) */ +/* > */ +/* > SIGMA library, xGESVDQ section updated February 2016. */ +/* > Developed and coded by Zlatko Drmac, Department of Mathematics */ +/* > University of Zagreb, Croatia, drmac@math.hr */ +/* > \endverbatim */ + + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > Developed and coded by Zlatko Drmac, Department of Mathematics */ +/* > University of Zagreb, Croatia, drmac@math.hr */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2018 */ + +/* > \ingroup doubleGEsing */ + +/* ===================================================================== */ +/* Subroutine */ int dgesvdq_(char *joba, char *jobp, char *jobr, char *jobu, + char *jobv, integer *m, integer *n, doublereal *a, integer *lda, + doublereal *s, doublereal *u, integer *ldu, doublereal *v, integer * + ldv, integer *numrank, integer *iwork, integer *liwork, doublereal * + work, integer *lwork, doublereal *rwork, integer *lrwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer lwrk_dormqr__, lwrk_dgesvd2__, ierr, lwrk_dormqr2__; + doublereal rtmp; + integer optratio; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + logical lsvc0, accla; + integer lwqp3; + logical acclh, acclm; + integer p, q; + logical conda; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + integer iwoff; + logical lsvec; + doublereal sfmin, epsln; + integer lwcon; + logical rsvec; + integer lwlqf, lwqrf, n1, lwsvd; + logical dntwu, dntwv, wntua; + integer lworq; + logical wntuf, wntva, wntur, wntus, wntvr; + extern /* Subroutine */ int dgeqp3_(integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *); + integer lwsvd2, lworq2; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + integer nr; + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + doublereal sconda; + extern /* Subroutine */ int dgeqrf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dgesvd_(char *, char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *), dlaset_(char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen), dlapmt_(logical *, + integer *, integer *, doublereal *, integer *, integer *), + dpocon_(char *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, integer *), + dlaswp_(integer *, doublereal *, integer *, integer *, integer *, + integer *, integer *), dormlq_(char *, char *, integer *, integer + *, integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), + dormqr_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + integer minwrk; + logical rtrans; + doublereal rdummy[1]; + integer lworlq; + logical lquery; + integer optwrk; + logical rowprm; + doublereal big; + integer minwrk2; + logical ascaled; + integer lwrk_dgeqp3__, optwrk2, lwrk_dgelqf__, iminwrk, lwrk_dgeqrf__, + rminwrk, lwrk_dgesvd__, lwrk_dormlq__; + + +/* ===================================================================== */ + + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --iwork; + --work; + --rwork; + + /* Function Body */ + wntus = lsame_(jobu, "S") || lsame_(jobu, "U"); + wntur = lsame_(jobu, "R"); + wntua = lsame_(jobu, "A"); + wntuf = lsame_(jobu, "F"); + lsvc0 = wntus || wntur || wntua; + lsvec = lsvc0 || wntuf; + dntwu = lsame_(jobu, "N"); + + wntvr = lsame_(jobv, "R"); + wntva = lsame_(jobv, "A") || lsame_(jobv, "V"); + rsvec = wntvr || wntva; + dntwv = lsame_(jobv, "N"); + + accla = lsame_(joba, "A"); + acclm = lsame_(joba, "M"); + conda = lsame_(joba, "E"); + acclh = lsame_(joba, "H") || conda; + + rowprm = lsame_(jobp, "P"); + rtrans = lsame_(jobr, "T"); + + if (rowprm) { + if (conda) { +/* Computing MAX */ + i__1 = 1, i__2 = *n + *m - 1 + *n; + iminwrk = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = *n + *m - 1; + iminwrk = f2cmax(i__1,i__2); + } + rminwrk = f2cmax(2,*m); + } else { + if (conda) { +/* Computing MAX */ + i__1 = 1, i__2 = *n + *n; + iminwrk = f2cmax(i__1,i__2); + } else { + iminwrk = f2cmax(1,*n); + } + rminwrk = 2; + } + lquery = *liwork == -1 || *lwork == -1 || *lrwork == -1; + *info = 0; + if (! (accla || acclm || acclh)) { + *info = -1; + } else if (! (rowprm || lsame_(jobp, "N"))) { + *info = -2; + } else if (! (rtrans || lsame_(jobr, "N"))) { + *info = -3; + } else if (! (lsvec || dntwu)) { + *info = -4; + } else if (wntur && wntva) { + *info = -5; + } else if (! (rsvec || dntwv)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || *n > *m) { + *info = -7; + } else if (*lda < f2cmax(1,*m)) { + *info = -9; + } else if (*ldu < 1 || lsvc0 && *ldu < *m || wntuf && *ldu < *n) { + *info = -12; + } else if (*ldv < 1 || rsvec && *ldv < *n || conda && *ldv < *n) { + *info = -14; + } else if (*liwork < iminwrk && ! lquery) { + *info = -17; + } + + + if (*info == 0) { +/* [[The expressions for computing the minimal and the optimal */ +/* values of LWORK are written with a lot of redundancy and */ +/* can be simplified. However, this detailed form is easier for */ +/* maintenance and modifications of the code.]] */ + + lwqp3 = *n * 3 + 1; + if (wntus || wntur) { + lworq = f2cmax(*n,1); + } else if (wntua) { + lworq = f2cmax(*m,1); + } + lwcon = *n * 3; +/* Computing MAX */ + i__1 = *n * 5; + lwsvd = f2cmax(i__1,1); + if (lquery) { + dgeqp3_(m, n, &a[a_offset], lda, &iwork[1], rdummy, rdummy, &c_n1, + &ierr); + lwrk_dgeqp3__ = (integer) rdummy[0]; + if (wntus || wntur) { + dormqr_("L", "N", m, n, n, &a[a_offset], lda, rdummy, &u[ + u_offset], ldu, rdummy, &c_n1, &ierr); + lwrk_dormqr__ = (integer) rdummy[0]; + } else if (wntua) { + dormqr_("L", "N", m, m, n, &a[a_offset], lda, rdummy, &u[ + u_offset], ldu, rdummy, &c_n1, &ierr); + lwrk_dormqr__ = (integer) rdummy[0]; + } else { + lwrk_dormqr__ = 0; + } + } + minwrk = 2; + optwrk = 2; + if (! (lsvec || rsvec)) { +/* only the singular values are requested */ + if (conda) { +/* Computing MAX */ + i__1 = *n + lwqp3, i__1 = f2cmax(i__1,lwcon); + minwrk = f2cmax(i__1,lwsvd); + } else { +/* Computing MAX */ + i__1 = *n + lwqp3; + minwrk = f2cmax(i__1,lwsvd); + } + if (lquery) { + dgesvd_("N", "N", n, n, &a[a_offset], lda, &s[1], &u[u_offset] + , ldu, &v[v_offset], ldv, rdummy, &c_n1, &ierr); + lwrk_dgesvd__ = (integer) rdummy[0]; + if (conda) { +/* Computing MAX */ + i__1 = *n + lwrk_dgeqp3__, i__2 = *n + lwcon, i__1 = f2cmax( + i__1,i__2); + optwrk = f2cmax(i__1,lwrk_dgesvd__); + } else { +/* Computing MAX */ + i__1 = *n + lwrk_dgeqp3__; + optwrk = f2cmax(i__1,lwrk_dgesvd__); + } + } + } else if (lsvec && ! rsvec) { +/* singular values and the left singular vectors are requested */ + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwcon), i__1 = f2cmax(i__1,lwsvd); + minwrk = *n + f2cmax(i__1,lworq); + } else { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwsvd); + minwrk = *n + f2cmax(i__1,lworq); + } + if (lquery) { + if (rtrans) { + dgesvd_("N", "O", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + } else { + dgesvd_("O", "N", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + } + lwrk_dgesvd__ = (integer) rdummy[0]; + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwrk_dgeqp3__,lwcon), i__1 = f2cmax(i__1, + lwrk_dgesvd__); + optwrk = *n + f2cmax(i__1,lwrk_dormqr__); + } else { +/* Computing MAX */ + i__1 = f2cmax(lwrk_dgeqp3__,lwrk_dgesvd__); + optwrk = *n + f2cmax(i__1,lwrk_dormqr__); + } + } + } else if (rsvec && ! lsvec) { +/* singular values and the right singular vectors are requested */ + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwcon); + minwrk = *n + f2cmax(i__1,lwsvd); + } else { + minwrk = *n + f2cmax(lwqp3,lwsvd); + } + if (lquery) { + if (rtrans) { + dgesvd_("O", "N", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + } else { + dgesvd_("N", "O", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + } + lwrk_dgesvd__ = (integer) rdummy[0]; + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwrk_dgeqp3__,lwcon); + optwrk = *n + f2cmax(i__1,lwrk_dgesvd__); + } else { + optwrk = *n + f2cmax(lwrk_dgeqp3__,lwrk_dgesvd__); + } + } + } else { +/* full SVD is requested */ + if (rtrans) { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwsvd); + minwrk = f2cmax(i__1,lworq); + if (conda) { + minwrk = f2cmax(minwrk,lwcon); + } + minwrk += *n; + if (wntva) { +/* Computing MAX */ + i__1 = *n / 2; + lwqrf = f2cmax(i__1,1); +/* Computing MAX */ + i__1 = *n / 2 * 5; + lwsvd2 = f2cmax(i__1,1); + lworq2 = f2cmax(*n,1); +/* Computing MAX */ + i__1 = lwqp3, i__2 = *n / 2 + lwqrf, i__1 = f2cmax(i__1,i__2) + , i__2 = *n / 2 + lwsvd2, i__1 = f2cmax(i__1,i__2), + i__2 = *n / 2 + lworq2, i__1 = f2cmax(i__1,i__2); + minwrk2 = f2cmax(i__1,lworq); + if (conda) { + minwrk2 = f2cmax(minwrk2,lwcon); + } + minwrk2 = *n + minwrk2; + minwrk = f2cmax(minwrk,minwrk2); + } + } else { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwsvd); + minwrk = f2cmax(i__1,lworq); + if (conda) { + minwrk = f2cmax(minwrk,lwcon); + } + minwrk += *n; + if (wntva) { +/* Computing MAX */ + i__1 = *n / 2; + lwlqf = f2cmax(i__1,1); +/* Computing MAX */ + i__1 = *n / 2 * 5; + lwsvd2 = f2cmax(i__1,1); + lworlq = f2cmax(*n,1); +/* Computing MAX */ + i__1 = lwqp3, i__2 = *n / 2 + lwlqf, i__1 = f2cmax(i__1,i__2) + , i__2 = *n / 2 + lwsvd2, i__1 = f2cmax(i__1,i__2), + i__2 = *n / 2 + lworlq, i__1 = f2cmax(i__1,i__2); + minwrk2 = f2cmax(i__1,lworq); + if (conda) { + minwrk2 = f2cmax(minwrk2,lwcon); + } + minwrk2 = *n + minwrk2; + minwrk = f2cmax(minwrk,minwrk2); + } + } + if (lquery) { + if (rtrans) { + dgesvd_("O", "A", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + lwrk_dgesvd__ = (integer) rdummy[0]; +/* Computing MAX */ + i__1 = f2cmax(lwrk_dgeqp3__,lwrk_dgesvd__); + optwrk = f2cmax(i__1,lwrk_dormqr__); + if (conda) { + optwrk = f2cmax(optwrk,lwcon); + } + optwrk = *n + optwrk; + if (wntva) { + i__1 = *n / 2; + dgeqrf_(n, &i__1, &u[u_offset], ldu, rdummy, rdummy, & + c_n1, &ierr); + lwrk_dgeqrf__ = (integer) rdummy[0]; + i__1 = *n / 2; + i__2 = *n / 2; + dgesvd_("S", "O", &i__1, &i__2, &v[v_offset], ldv, &s[ + 1], &u[u_offset], ldu, &v[v_offset], ldv, + rdummy, &c_n1, &ierr); + lwrk_dgesvd2__ = (integer) rdummy[0]; + i__1 = *n / 2; + dormqr_("R", "C", n, n, &i__1, &u[u_offset], ldu, + rdummy, &v[v_offset], ldv, rdummy, &c_n1, & + ierr); + lwrk_dormqr2__ = (integer) rdummy[0]; +/* Computing MAX */ + i__1 = lwrk_dgeqp3__, i__2 = *n / 2 + lwrk_dgeqrf__, + i__1 = f2cmax(i__1,i__2), i__2 = *n / 2 + + lwrk_dgesvd2__, i__1 = f2cmax(i__1,i__2), i__2 = + *n / 2 + lwrk_dormqr2__; + optwrk2 = f2cmax(i__1,i__2); + if (conda) { + optwrk2 = f2cmax(optwrk2,lwcon); + } + optwrk2 = *n + optwrk2; + optwrk = f2cmax(optwrk,optwrk2); + } + } else { + dgesvd_("S", "O", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + lwrk_dgesvd__ = (integer) rdummy[0]; +/* Computing MAX */ + i__1 = f2cmax(lwrk_dgeqp3__,lwrk_dgesvd__); + optwrk = f2cmax(i__1,lwrk_dormqr__); + if (conda) { + optwrk = f2cmax(optwrk,lwcon); + } + optwrk = *n + optwrk; + if (wntva) { + i__1 = *n / 2; + dgelqf_(&i__1, n, &u[u_offset], ldu, rdummy, rdummy, & + c_n1, &ierr); + lwrk_dgelqf__ = (integer) rdummy[0]; + i__1 = *n / 2; + i__2 = *n / 2; + dgesvd_("S", "O", &i__1, &i__2, &v[v_offset], ldv, &s[ + 1], &u[u_offset], ldu, &v[v_offset], ldv, + rdummy, &c_n1, &ierr); + lwrk_dgesvd2__ = (integer) rdummy[0]; + i__1 = *n / 2; + dormlq_("R", "N", n, n, &i__1, &u[u_offset], ldu, + rdummy, &v[v_offset], ldv, rdummy, &c_n1, & + ierr); + lwrk_dormlq__ = (integer) rdummy[0]; +/* Computing MAX */ + i__1 = lwrk_dgeqp3__, i__2 = *n / 2 + lwrk_dgelqf__, + i__1 = f2cmax(i__1,i__2), i__2 = *n / 2 + + lwrk_dgesvd2__, i__1 = f2cmax(i__1,i__2), i__2 = + *n / 2 + lwrk_dormlq__; + optwrk2 = f2cmax(i__1,i__2); + if (conda) { + optwrk2 = f2cmax(optwrk2,lwcon); + } + optwrk2 = *n + optwrk2; + optwrk = f2cmax(optwrk,optwrk2); + } + } + } + } + + minwrk = f2cmax(2,minwrk); + optwrk = f2cmax(2,optwrk); + if (*lwork < minwrk && ! lquery) { + *info = -19; + } + + } + + if (*info == 0 && *lrwork < rminwrk && ! lquery) { + *info = -21; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGESVDQ", &i__1, (ftnlen)7); + return 0; + } else if (lquery) { + +/* Return optimal workspace */ + + iwork[1] = iminwrk; + work[1] = (doublereal) optwrk; + work[2] = (doublereal) minwrk; + rwork[1] = (doublereal) rminwrk; + return 0; + } + +/* Quick return if the matrix is void. */ + + if (*m == 0 || *n == 0) { + return 0; + } + + big = dlamch_("O"); + ascaled = FALSE_; + iwoff = 1; + if (rowprm) { + iwoff = *m; +/* ell-infinity norm - this enhances numerical robustness in */ +/* the case of differently scaled rows. */ + i__1 = *m; + for (p = 1; p <= i__1; ++p) { +/* RWORK(p) = ABS( A(p,ICAMAX(N,A(p,1),LDA)) ) */ +/* [[DLANGE will return NaN if an entry of the p-th row is Nan]] */ + rwork[p] = dlange_("M", &c__1, n, &a[p + a_dim1], lda, rdummy); + if (rwork[p] != rwork[p] || rwork[p] * 0. != 0.) { + *info = -8; + i__2 = -(*info); + xerbla_("DGESVDQ", &i__2, (ftnlen)7); + return 0; + } +/* L1904: */ + } + i__1 = *m - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *m - p + 1; + q = idamax_(&i__2, &rwork[p], &c__1) + p - 1; + iwork[*n + p] = q; + if (p != q) { + rtmp = rwork[p]; + rwork[p] = rwork[q]; + rwork[q] = rtmp; + } +/* L1952: */ + } + + if (rwork[1] == 0.) { +/* Quick return: A is the M x N zero matrix. */ + *numrank = 0; + dlaset_("G", n, &c__1, &c_b72, &c_b72, &s[1], n); + if (wntus) { + dlaset_("G", m, n, &c_b72, &c_b76, &u[u_offset], ldu); + } + if (wntua) { + dlaset_("G", m, m, &c_b72, &c_b76, &u[u_offset], ldu); + } + if (wntva) { + dlaset_("G", n, n, &c_b72, &c_b76, &v[v_offset], ldv); + } + if (wntuf) { + dlaset_("G", n, &c__1, &c_b72, &c_b72, &work[1], n) + ; + dlaset_("G", m, n, &c_b72, &c_b76, &u[u_offset], ldu); + } + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + iwork[p] = p; +/* L5001: */ + } + if (rowprm) { + i__1 = *n + *m - 1; + for (p = *n + 1; p <= i__1; ++p) { + iwork[p] = p - *n; +/* L5002: */ + } + } + if (conda) { + rwork[1] = -1.; + } + rwork[2] = -1.; + return 0; + } + + if (rwork[1] > big / sqrt((doublereal) (*m))) { +/* matrix by 1/sqrt(M) if too large entry detected */ + d__1 = sqrt((doublereal) (*m)); + dlascl_("G", &c__0, &c__0, &d__1, &c_b76, m, n, &a[a_offset], lda, + &ierr); + ascaled = TRUE_; + } + i__1 = *m - 1; + dlaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[*n + 1], &c__1); + } + +/* norms overflows during the QR factorization. The SVD procedure should */ +/* have its own scaling to save the singular values from overflows and */ +/* underflows. That depends on the SVD procedure. */ + + if (! rowprm) { + rtmp = dlange_("M", m, n, &a[a_offset], lda, rdummy); + if (rtmp != rtmp || rtmp * 0. != 0.) { + *info = -8; + i__1 = -(*info); + xerbla_("DGESVDQ", &i__1, (ftnlen)7); + return 0; + } + if (rtmp > big / sqrt((doublereal) (*m))) { +/* matrix by 1/sqrt(M) if too large entry detected */ + d__1 = sqrt((doublereal) (*m)); + dlascl_("G", &c__0, &c__0, &d__1, &c_b76, m, n, &a[a_offset], lda, + &ierr); + ascaled = TRUE_; + } + } + + +/* A * P = Q * [ R ] */ +/* [ 0 ] */ + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + iwork[p] = 0; +/* L1963: */ + } + i__1 = *lwork - *n; + dgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &work[1], &work[*n + 1], & + i__1, &ierr); + +/* If the user requested accuracy level allows truncation in the */ +/* computed upper triangular factor, the matrix R is examined and, */ +/* if possible, replaced with its leading upper trapezoidal part. */ + + epsln = dlamch_("E"); + sfmin = dlamch_("S"); +/* SMALL = SFMIN / EPSLN */ + nr = *n; + + if (accla) { + +/* Standard absolute error bound suffices. All sigma_i with */ +/* sigma_i < N*EPS*||A||_F are flushed to zero. This is an */ +/* aggressive enforcement of lower numerical rank by introducing a */ +/* backward error of the order of N*EPS*||A||_F. */ + nr = 1; + rtmp = sqrt((doublereal) (*n)) * epsln; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((d__2 = a[p + p * a_dim1], abs(d__2)) < rtmp * (d__1 = a[ + a_dim1 + 1], abs(d__1))) { + goto L3002; + } + ++nr; +/* L3001: */ + } +L3002: + + ; + } else if (acclm) { +/* Sudden drop on the diagonal of R is used as the criterion for being */ +/* close-to-rank-deficient. The threshold is set to EPSLN=DLAMCH('E'). */ +/* [[This can be made more flexible by replacing this hard-coded value */ +/* with a user specified threshold.]] Also, the values that underflow */ +/* will be truncated. */ + nr = 1; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((d__2 = a[p + p * a_dim1], abs(d__2)) < epsln * (d__1 = a[p - + 1 + (p - 1) * a_dim1], abs(d__1)) || (d__3 = a[p + p * + a_dim1], abs(d__3)) < sfmin) { + goto L3402; + } + ++nr; +/* L3401: */ + } +L3402: + + ; + } else { +/* obvious case of zero pivots. */ +/* R(i,i)=0 => R(i:N,i:N)=0. */ + nr = 1; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((d__1 = a[p + p * a_dim1], abs(d__1)) == 0.) { + goto L3502; + } + ++nr; +/* L3501: */ + } +L3502: + + if (conda) { +/* Estimate the scaled condition number of A. Use the fact that it is */ +/* the same as the scaled condition number of R. */ + dlacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv); +/* Only the leading NR x NR submatrix of the triangular factor */ +/* is considered. Only if NR=N will this give a reliable error */ +/* bound. However, even for NR < N, this can be used on an */ +/* expert level and obtain useful information in the sense of */ +/* perturbation theory. */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + rtmp = dnrm2_(&p, &v[p * v_dim1 + 1], &c__1); + d__1 = 1. / rtmp; + dscal_(&p, &d__1, &v[p * v_dim1 + 1], &c__1); +/* L3053: */ + } + if (! (lsvec || rsvec)) { + dpocon_("U", &nr, &v[v_offset], ldv, &c_b76, &rtmp, &work[1], + &iwork[*n + iwoff], &ierr); + } else { + dpocon_("U", &nr, &v[v_offset], ldv, &c_b76, &rtmp, &work[*n + + 1], &iwork[*n + iwoff], &ierr); + } + sconda = 1. / sqrt(rtmp); +/* For NR=N, SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1), */ +/* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ +/* See the reference [1] for more details. */ + } + + } + + if (wntur) { + n1 = nr; + } else if (wntus || wntuf) { + n1 = *n; + } else if (wntua) { + n1 = *m; + } + + if (! (rsvec || lsvec)) { +/* ....................................................................... */ +/* ....................................................................... */ + if (rtrans) { + +/* the upper triangle of [A] to zero. */ + i__1 = f2cmin(*n,nr); + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + a[q + p * a_dim1] = a[p + q * a_dim1]; + if (q <= nr) { + a[p + q * a_dim1] = 0.; + } +/* L1147: */ + } +/* L1146: */ + } + + dgesvd_("N", "N", n, &nr, &a[a_offset], lda, &s[1], &u[u_offset], + ldu, &v[v_offset], ldv, &work[1], lwork, info); + + } else { + + + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &a[a_dim1 + 2], + lda); + } + dgesvd_("N", "N", &nr, n, &a[a_offset], lda, &s[1], &u[u_offset], + ldu, &v[v_offset], ldv, &work[1], lwork, info); + + } + + } else if (lsvec && ! rsvec) { +/* ....................................................................... */ +/* ......................................................................."""""""" */ + if (rtrans) { +/* vectors of R */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + u[q + p * u_dim1] = a[p + q * a_dim1]; +/* L1193: */ + } +/* L1192: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &u[(u_dim1 << 1) + + 1], ldu); + } +/* vectors overwrite [U](1:NR,1:NR) as transposed. These */ +/* will be pre-multiplied by Q to build the left singular vectors of A. */ + i__1 = *lwork - *n; + dgesvd_("N", "O", n, &nr, &u[u_offset], ldu, &s[1], &u[u_offset], + ldu, &u[u_offset], ldu, &work[*n + 1], &i__1, info); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + rtmp = u[q + p * u_dim1]; + u[q + p * u_dim1] = u[p + q * u_dim1]; + u[p + q * u_dim1] = rtmp; +/* L1120: */ + } +/* L1119: */ + } + + } else { + dlacpy_("U", &nr, n, &a[a_offset], lda, &u[u_offset], ldu); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &u[u_dim1 + 2], + ldu); + } +/* vectors overwrite [U](1:NR,1:NR) */ + i__1 = *lwork - *n; + dgesvd_("O", "N", &nr, n, &u[u_offset], ldu, &s[1], &u[u_offset], + ldu, &v[v_offset], ldv, &work[*n + 1], &i__1, info); +/* R. These will be pre-multiplied by Q to build the left singular */ +/* vectors of A. */ + } + +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) * u_dim1 + + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 + (nr + + 1) * u_dim1], ldu); + } + } + +/* The Q matrix from the first QRF is built into the left singular */ +/* vectors matrix U. */ + + if (! wntuf) { + i__1 = *lwork - *n; + dormqr_("L", "N", m, &n1, n, &a[a_offset], lda, &work[1], &u[ + u_offset], ldu, &work[*n + 1], &i__1, &ierr); + } + if (rowprm && ! wntuf) { + i__1 = *m - 1; + dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[*n + 1], & + c_n1); + } + + } else if (rsvec && ! lsvec) { +/* ....................................................................... */ +/* ....................................................................... */ + if (rtrans) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + v[q + p * v_dim1] = a[p + q * a_dim1]; +/* L1166: */ + } +/* L1165: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << 1) + + 1], ldv); + } +/* vectors not computed */ + if (wntvr || nr == *n) { + i__1 = *lwork - *n; + dgesvd_("O", "N", n, &nr, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &u[u_offset], ldu, &work[*n + 1], & + i__1, info); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + rtmp = v[q + p * v_dim1]; + v[q + p * v_dim1] = v[p + q * v_dim1]; + v[p + q * v_dim1] = rtmp; +/* L1122: */ + } +/* L1121: */ + } + + if (nr < *n) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = nr + 1; q <= i__2; ++q) { + v[p + q * v_dim1] = v[q + p * v_dim1]; +/* L1104: */ + } +/* L1103: */ + } + } + dlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); + } else { +/* [!] This is simple implementation that augments [V](1:N,1:NR) */ +/* by padding a zero block. In the case NR << N, a more efficient */ +/* way is to first use the QR factorization. For more details */ +/* how to implement this, see the " FULL SVD " branch. */ + i__1 = *n - nr; + dlaset_("G", n, &i__1, &c_b72, &c_b72, &v[(nr + 1) * v_dim1 + + 1], ldv); + i__1 = *lwork - *n; + dgesvd_("O", "N", n, n, &v[v_offset], ldv, &s[1], &u[u_offset] + , ldu, &u[u_offset], ldu, &work[*n + 1], &i__1, info); + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + rtmp = v[q + p * v_dim1]; + v[q + p * v_dim1] = v[p + q * v_dim1]; + v[p + q * v_dim1] = rtmp; +/* L1124: */ + } +/* L1123: */ + } + dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); + } + + } else { + dlacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 + 2], + ldv); + } +/* vectors stored in U(1:NR,1:NR) */ + if (wntvr || nr == *n) { + i__1 = *lwork - *n; + dgesvd_("N", "O", &nr, n, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], & + i__1, info); + dlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); + } else { +/* [!] This is simple implementation that augments [V](1:NR,1:N) */ +/* by padding a zero block. In the case NR << N, a more efficient */ +/* way is to first use the LQ factorization. For more details */ +/* how to implement this, see the " FULL SVD " branch. */ + i__1 = *n - nr; + dlaset_("G", &i__1, n, &c_b72, &c_b72, &v[nr + 1 + v_dim1], + ldv); + i__1 = *lwork - *n; + dgesvd_("N", "O", n, n, &v[v_offset], ldv, &s[1], &u[u_offset] + , ldu, &v[v_offset], ldv, &work[*n + 1], &i__1, info); + dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); + } +/* vectors of A. */ + } + + } else { +/* ....................................................................... */ +/* ....................................................................... */ + if (rtrans) { + + + if (wntvr || nr == *n) { +/* vectors of R**T */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + v[q + p * v_dim1] = a[p + q * a_dim1]; +/* L1169: */ + } +/* L1168: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << + 1) + 1], ldv); + } + +/* singular vectors of R**T stored in [U](1:NR,1:NR) as transposed */ + i__1 = *lwork - *n; + dgesvd_("O", "A", n, &nr, &v[v_offset], ldv, &s[1], &v[ + v_offset], ldv, &u[u_offset], ldu, &work[*n + 1], & + i__1, info); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + rtmp = v[q + p * v_dim1]; + v[q + p * v_dim1] = v[p + q * v_dim1]; + v[p + q * v_dim1] = rtmp; +/* L1116: */ + } +/* L1115: */ + } + if (nr < *n) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = nr + 1; q <= i__2; ++q) { + v[p + q * v_dim1] = v[q + p * v_dim1]; +/* L1102: */ + } +/* L1101: */ + } + } + dlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + rtmp = u[q + p * u_dim1]; + u[q + p * u_dim1] = u[p + q * u_dim1]; + u[p + q * u_dim1] = rtmp; +/* L1118: */ + } +/* L1117: */ + } + + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 + + (nr + 1) * u_dim1], ldu); + } + } + + } else { +/* vectors of R**T */ +/* [[The optimal ratio N/NR for using QRF instead of padding */ +/* with zeros. Here hard coded to 2; it must be at least */ +/* two due to work space constraints.]] */ +/* OPTRATIO = ILAENV(6, 'DGESVD', 'S' // 'O', NR,N,0,0) */ +/* OPTRATIO = MAX( OPTRATIO, 2 ) */ + optratio = 2; + if (optratio * nr > *n) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + v[q + p * v_dim1] = a[p + q * a_dim1]; +/* L1199: */ + } +/* L1198: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 + << 1) + 1], ldv); + } + + i__1 = *n - nr; + dlaset_("A", n, &i__1, &c_b72, &c_b72, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *lwork - *n; + dgesvd_("O", "A", n, n, &v[v_offset], ldv, &s[1], &v[ + v_offset], ldv, &u[u_offset], ldu, &work[*n + 1], + &i__1, info); + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + rtmp = v[q + p * v_dim1]; + v[q + p * v_dim1] = v[p + q * v_dim1]; + v[p + q * v_dim1] = rtmp; +/* L1114: */ + } +/* L1113: */ + } + dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* (M x N1), i.e. (M x N) or (M x M). */ + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + rtmp = u[q + p * u_dim1]; + u[q + p * u_dim1] = u[p + q * u_dim1]; + u[p + q * u_dim1] = rtmp; +/* L1112: */ + } +/* L1111: */ + } + + if (*n < *m && ! wntuf) { + i__1 = *m - *n; + dlaset_("A", &i__1, n, &c_b72, &c_b72, &u[*n + 1 + + u_dim1], ldu); + if (*n < n1) { + i__1 = n1 - *n; + dlaset_("A", n, &i__1, &c_b72, &c_b72, &u[(*n + 1) + * u_dim1 + 1], ldu); + i__1 = *m - *n; + i__2 = n1 - *n; + dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[*n + + 1 + (*n + 1) * u_dim1], ldu); + } + } + } else { +/* singular vectors of R */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + u[q + (nr + p) * u_dim1] = a[p + q * a_dim1]; +/* L1197: */ + } +/* L1196: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &u[(nr + 2) + * u_dim1 + 1], ldu); + } + i__1 = *lwork - *n - nr; + dgeqrf_(n, &nr, &u[(nr + 1) * u_dim1 + 1], ldu, &work[*n + + 1], &work[*n + nr + 1], &i__1, &ierr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = 1; q <= i__2; ++q) { + v[q + p * v_dim1] = u[p + (nr + q) * u_dim1]; +/* L1144: */ + } +/* L1143: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << + 1) + 1], ldv); + i__1 = *lwork - *n - nr; + dgesvd_("S", "O", &nr, &nr, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &work[*n + nr + + 1], &i__1, info); + i__1 = *n - nr; + dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &v[nr + 1 + + v_dim1], ldv); + i__1 = *n - nr; + dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &v[nr + 1 + ( + nr + 1) * v_dim1], ldv); + i__1 = *lwork - *n - nr; + dormqr_("R", "C", n, n, &nr, &u[(nr + 1) * u_dim1 + 1], + ldu, &work[*n + 1], &v[v_offset], ldv, &work[*n + + nr + 1], &i__1, &ierr); + dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + + 1) * u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + + 1 + (nr + 1) * u_dim1], ldu); + } + } + } + } + + } else { + + + if (wntvr || nr == *n) { + dlacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 + 2], + ldv); + } +/* singular vectors of R stored in [U](1:NR,1:NR) */ + i__1 = *lwork - *n; + dgesvd_("S", "O", &nr, n, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], & + i__1, info); + dlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 + + (nr + 1) * u_dim1], ldu); + } + } + + } else { +/* is then N1 (N or M) */ +/* [[The optimal ratio N/NR for using LQ instead of padding */ +/* with zeros. Here hard coded to 2; it must be at least */ +/* two due to work space constraints.]] */ +/* OPTRATIO = ILAENV(6, 'DGESVD', 'S' // 'O', NR,N,0,0) */ +/* OPTRATIO = MAX( OPTRATIO, 2 ) */ + optratio = 2; + if (optratio * nr > *n) { + dlacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 + + 2], ldv); + } +/* singular vectors of R stored in [U](1:NR,1:NR) */ + i__1 = *n - nr; + dlaset_("A", &i__1, n, &c_b72, &c_b72, &v[nr + 1 + v_dim1] + , ldv); + i__1 = *lwork - *n; + dgesvd_("S", "O", n, n, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], + &i__1, info); + dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* singular vectors of A. The leading N left singular vectors */ +/* are in [U](1:N,1:N) */ +/* (M x N1), i.e. (M x N) or (M x M). */ + if (*n < *m && ! wntuf) { + i__1 = *m - *n; + dlaset_("A", &i__1, n, &c_b72, &c_b72, &u[*n + 1 + + u_dim1], ldu); + if (*n < n1) { + i__1 = n1 - *n; + dlaset_("A", n, &i__1, &c_b72, &c_b72, &u[(*n + 1) + * u_dim1 + 1], ldu); + i__1 = *m - *n; + i__2 = n1 - *n; + dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[*n + + 1 + (*n + 1) * u_dim1], ldu); + } + } + } else { + dlacpy_("U", &nr, n, &a[a_offset], lda, &u[nr + 1 + + u_dim1], ldu); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &u[nr + 2 + + u_dim1], ldu); + } + i__1 = *lwork - *n - nr; + dgelqf_(&nr, n, &u[nr + 1 + u_dim1], ldu, &work[*n + 1], & + work[*n + nr + 1], &i__1, &ierr); + dlacpy_("L", &nr, &nr, &u[nr + 1 + u_dim1], ldu, &v[ + v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 + << 1) + 1], ldv); + } + i__1 = *lwork - *n - nr; + dgesvd_("S", "O", &nr, &nr, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &work[*n + nr + + 1], &i__1, info); + i__1 = *n - nr; + dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &v[nr + 1 + + v_dim1], ldv); + i__1 = *n - nr; + dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &v[nr + 1 + ( + nr + 1) * v_dim1], ldv); + i__1 = *lwork - *n - nr; + dormlq_("R", "N", n, n, &nr, &u[nr + 1 + u_dim1], ldu, & + work[*n + 1], &v[v_offset], ldv, &work[*n + nr + + 1], &i__1, &ierr); + dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + + 1) * u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + + 1 + (nr + 1) * u_dim1], ldu); + } + } + } + } + } + +/* The Q matrix from the first QRF is built into the left singular */ +/* vectors matrix U. */ + + if (! wntuf) { + i__1 = *lwork - *n; + dormqr_("L", "N", m, &n1, n, &a[a_offset], lda, &work[1], &u[ + u_offset], ldu, &work[*n + 1], &i__1, &ierr); + } + if (rowprm && ! wntuf) { + i__1 = *m - 1; + dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[*n + 1], & + c_n1); + } + +/* ... end of the "full SVD" branch */ + } + +/* Check whether some singular values are returned as zeros, e.g. */ +/* due to underflow, and update the numerical rank. */ + p = nr; + for (q = p; q >= 1; --q) { + if (s[q] > 0.) { + goto L4002; + } + --nr; +/* L4001: */ + } +L4002: + +/* singular values are set to zero. */ + if (nr < *n) { + i__1 = *n - nr; + dlaset_("G", &i__1, &c__1, &c_b72, &c_b72, &s[nr + 1], n); + } +/* values. */ + if (ascaled) { + d__1 = sqrt((doublereal) (*m)); + dlascl_("G", &c__0, &c__0, &c_b76, &d__1, &nr, &c__1, &s[1], n, &ierr); + } + if (conda) { + rwork[1] = sconda; + } + rwork[2] = (doublereal) (p - nr); +/* exact zeros in DGESVD() applied to the (possibly truncated) */ +/* full row rank triangular (trapezoidal) factor of A. */ + *numrank = nr; + + return 0; + +/* End of DGESVDQ */ + +} /* dgesvdq_ */ + diff --git a/lapack-netlib/SRC/dgesvdx.c b/lapack-netlib/SRC/dgesvdx.c new file mode 100644 index 000000000..b379aa59f --- /dev/null +++ b/lapack-netlib/SRC/dgesvdx.c @@ -0,0 +1,1342 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGESVDX computes the singular value decomposition (SVD) for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESVDX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, */ +/* $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, */ +/* $ LWORK, IWORK, INFO ) */ + + +/* CHARACTER JOBU, JOBVT, RANGE */ +/* INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS */ +/* DOUBLE PRECISION VL, VU */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESVDX computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, optionally computing the left and/or right singular */ +/* > vectors. The SVD is written */ +/* > */ +/* > A = U * SIGMA * transpose(V) */ +/* > */ +/* > where SIGMA is an M-by-N matrix which is zero except for its */ +/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ +/* > V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ +/* > are the singular values of A; they are real and non-negative, and */ +/* > are returned in descending order. The first f2cmin(m,n) columns of */ +/* > U and V are the left and right singular vectors of A. */ +/* > */ +/* > DGESVDX uses an eigenvalue problem for obtaining the SVD, which */ +/* > allows for the computation of a subset of singular values and */ +/* > vectors. See DBDSVDX for details. */ +/* > */ +/* > Note that the routine returns V**T, not V. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix U: */ +/* > = 'V': the first f2cmin(m,n) columns of U (the left singular */ +/* > vectors) or as specified by RANGE are returned in */ +/* > the array U; */ +/* > = 'N': no columns of U (no left singular vectors) are */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVT */ +/* > \verbatim */ +/* > JOBVT is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix */ +/* > V**T: */ +/* > = 'V': the first f2cmin(m,n) rows of V**T (the right singular */ +/* > vectors) or as specified by RANGE are returned in */ +/* > the array VT; */ +/* > = 'N': no rows of V**T (no right singular vectors) are */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all singular values will be found. */ +/* > = 'V': all singular values in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th singular values will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the contents of A are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for singular values. VU > VL. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for singular values. VU > VL. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest singular value to be returned. */ +/* > 1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest singular value to be returned. */ +/* > 1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] NS */ +/* > \verbatim */ +/* > NS is INTEGER */ +/* > The total number of singular values found, */ +/* > 0 <= NS <= f2cmin(M,N). */ +/* > If RANGE = 'A', NS = f2cmin(M,N); if RANGE = 'I', NS = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The singular values of A, sorted so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension (LDU,UCOL) */ +/* > If JOBU = 'V', U contains columns of U (the left singular */ +/* > vectors, stored columnwise) as specified by RANGE; if */ +/* > JOBU = 'N', U is not referenced. */ +/* > Note: The user must ensure that UCOL >= NS; if RANGE = 'V', */ +/* > the exact value of NS is not known in advance and an upper */ +/* > bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1; if */ +/* > JOBU = 'V', LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is DOUBLE PRECISION array, dimension (LDVT,N) */ +/* > If JOBVT = 'V', VT contains the rows of V**T (the right singular */ +/* > vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N', */ +/* > VT is not referenced. */ +/* > Note: The user must ensure that LDVT >= NS; if RANGE = 'V', */ +/* > the exact value of NS is not known in advance and an upper */ +/* > bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1; if */ +/* > JOBVT = 'V', LDVT >= NS (see above). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see */ +/* > comments inside the code): */ +/* > - PATH 1 (M much larger than N) */ +/* > - PATH 1t (N much larger than M) */ +/* > LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths. */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (12*MIN(M,N)) */ +/* > If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0, */ +/* > then IWORK contains the indices of the eigenvectors that failed */ +/* > to converge in DBDSVDX/DSTEVX. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge */ +/* > in DBDSVDX/DSTEVX. */ +/* > if INFO = N*2 + 1, an internal error occurred in */ +/* > DBDSVDX */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup doubleGEsing */ + +/* ===================================================================== */ +/* Subroutine */ int dgesvdx_(char *jobu, char *jobvt, char *range, integer * + m, integer *n, doublereal *a, integer *lda, doublereal *vl, + doublereal *vu, integer *il, integer *iu, integer *ns, doublereal *s, + doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, + doublereal *work, integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], + i__2, i__3; + char ch__1[2]; + + /* Local variables */ + integer iscl; + logical alls, inds; + integer ilqf; + doublereal anrm; + integer ierr, iqrf, itau; + char jobz[1]; + logical vals; + integer i__, j; + extern logical lsame_(char *, char *); + integer iltgk, itemp, minmn; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer itaup, itauq, iutgk, itgkz, mnthr; + logical wantu; + integer id, ie; + extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + doublereal bignum, abstol; + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + char rngtgk[1]; + extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), + dormqr_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + integer minwrk, maxwrk; + doublereal smlnum; + logical lquery, wantvt; + doublereal dum[1], eps; + extern /* Subroutine */ int dbdsvdx_(char *, char *, char *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + --work; + --iwork; + + /* Function Body */ + *ns = 0; + *info = 0; + abstol = dlamch_("S") * 2; + lquery = *lwork == -1; + minmn = f2cmin(*m,*n); + wantu = lsame_(jobu, "V"); + wantvt = lsame_(jobvt, "V"); + if (wantu || wantvt) { + *(unsigned char *)jobz = 'V'; + } else { + *(unsigned char *)jobz = 'N'; + } + alls = lsame_(range, "A"); + vals = lsame_(range, "V"); + inds = lsame_(range, "I"); + + *info = 0; + if (! lsame_(jobu, "V") && ! lsame_(jobu, "N")) { + *info = -1; + } else if (! lsame_(jobvt, "V") && ! lsame_(jobvt, + "N")) { + *info = -2; + } else if (! (alls || vals || inds)) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*m > *lda) { + *info = -7; + } else if (minmn > 0) { + if (vals) { + if (*vl < 0.) { + *info = -8; + } else if (*vu <= *vl) { + *info = -9; + } + } else if (inds) { + if (*il < 1 || *il > f2cmax(1,minmn)) { + *info = -10; + } else if (*iu < f2cmin(minmn,*il) || *iu > minmn) { + *info = -11; + } + } + if (*info == 0) { + if (wantu && *ldu < *m) { + *info = -15; + } else if (wantvt) { + if (inds) { + if (*ldvt < *iu - *il + 1) { + *info = -17; + } + } else if (*ldvt < minmn) { + *info = -17; + } + } + } + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (minmn > 0) { + if (*m >= *n) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + if (*m >= mnthr) { + +/* Path 1 (M much larger than N) */ + + maxwrk = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * (*n + 5) + (*n << 1) * ilaenv_( + &c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + if (wantu) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * (*n * 3 + 6) + *n * + ilaenv_(&c__1, "DORMQR", " ", n, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + if (wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * (*n * 3 + 6) + *n * + ilaenv_(&c__1, "DORMLQ", " ", n, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + minwrk = *n * (*n * 3 + 20); + } else { + +/* Path 2 (M at least N, but not much larger) */ + + maxwrk = (*n << 2) + (*m + *n) * ilaenv_(&c__1, "DGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wantu) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * ((*n << 1) + 5) + *n * + ilaenv_(&c__1, "DORMQR", " ", n, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + if (wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * ((*n << 1) + 5) + *n * + ilaenv_(&c__1, "DORMLQ", " ", n, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } +/* Computing MAX */ + i__2 = *n * ((*n << 1) + 19), i__3 = (*n << 2) + *m; + minwrk = f2cmax(i__2,i__3); + } + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + if (*n >= mnthr) { + +/* Path 1t (N much larger than M) */ + + maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * (*m + 5) + (*m << 1) * ilaenv_( + &c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + if (wantu) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * (*m * 3 + 6) + *m * + ilaenv_(&c__1, "DORMQR", " ", m, m, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + if (wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * (*m * 3 + 6) + *m * + ilaenv_(&c__1, "DORMLQ", " ", m, m, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + minwrk = *m * (*m * 3 + 20); + } else { + +/* Path 2t (N at least M, but not much larger) */ + + maxwrk = (*m << 2) + (*m + *n) * ilaenv_(&c__1, "DGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wantu) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * ((*m << 1) + 5) + *m * + ilaenv_(&c__1, "DORMQR", " ", m, m, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + if (wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * ((*m << 1) + 5) + *m * + ilaenv_(&c__1, "DORMLQ", " ", m, m, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } +/* Computing MAX */ + i__2 = *m * ((*m << 1) + 19), i__3 = (*m << 2) + *n; + minwrk = f2cmax(i__2,i__3); + } + } + } + maxwrk = f2cmax(maxwrk,minwrk); + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -19; + } + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("DGESVDX", &i__2, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Set singular values indices accord to RANGE. */ + + if (alls) { + *(unsigned char *)rngtgk = 'I'; + iltgk = 1; + iutgk = f2cmin(*m,*n); + } else if (inds) { + *(unsigned char *)rngtgk = 'I'; + iltgk = *il; + iutgk = *iu; + } else { + *(unsigned char *)rngtgk = 'V'; + iltgk = 0; + iutgk = 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = sqrt(dlamch_("S")) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + } else if (anrm > bignum) { + iscl = 1; + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce A using the QR */ +/* decomposition. */ + + if (*m >= mnthr) { + +/* Path 1 (M much larger than N): */ +/* A = Q * R = Q * ( QB * B * PB**T ) */ +/* = Q * ( QB * ( UB * S * VB**T ) * PB**T ) */ +/* U = Q * QB * UB; V**T = VB**T * PB**T */ + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + itau = 1; + itemp = itau + *n; + i__2 = *lwork - itemp + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[itemp], &i__2, + info); + +/* Copy R into WORK and bidiagonalize it: */ +/* (Workspace: need N*N+5*N, prefer N*N+4*N+2*N*NB) */ + + iqrf = itemp; + id = iqrf + *n * *n; + ie = id + *n; + itauq = ie + *n; + itaup = itauq + *n; + itemp = itaup + *n; + dlacpy_("U", n, n, &a[a_offset], lda, &work[iqrf], n); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_("L", &i__2, &i__3, &c_b109, &c_b109, &work[iqrf + 1], n); + i__2 = *lwork - itemp + 1; + dgebrd_(n, n, &work[iqrf], n, &work[id], &work[ie], &work[itauq], + &work[itaup], &work[itemp], &i__2, info); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 14*N + 2*N*(N+1)) */ + + itgkz = itemp; + itemp = itgkz + *n * ((*n << 1) + 1); + i__2 = *n << 1; + dbdsvdx_("U", jobz, rngtgk, n, &work[id], &work[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[ + itemp], &iwork[1], info); + +/* If needed, compute left singular vectors. */ + + if (wantu) { + j = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + dcopy_(n, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1); + j += *n << 1; + } + i__2 = *m - *n; + dlaset_("A", &i__2, ns, &c_b109, &c_b109, &u[*n + 1 + u_dim1], + ldu); + +/* Call DORMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + dormbr_("Q", "L", "N", n, ns, n, &work[iqrf], n, &work[itauq], + &u[u_offset], ldu, &work[itemp], &i__2, info); + +/* Call DORMQR to compute Q*(QB*UB). */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + dormqr_("L", "N", m, ns, n, &a[a_offset], lda, &work[itau], & + u[u_offset], ldu, &work[itemp], &i__2, info); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + j = itgkz + *n; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + dcopy_(n, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt); + j += *n << 1; + } + +/* Call DORMBR to compute VB**T * PB**T */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + dormbr_("P", "R", "T", ns, n, n, &work[iqrf], n, &work[itaup], + &vt[vt_offset], ldvt, &work[itemp], &i__2, info); + } + } else { + +/* Path 2 (M at least N, but not much larger) */ +/* Reduce A to bidiagonal form without QR decomposition */ +/* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T */ +/* U = QB * UB; V**T = VB**T * PB**T */ + +/* Bidiagonalize A */ +/* (Workspace: need 4*N+M, prefer 4*N+(M+N)*NB) */ + + id = 1; + ie = id + *n; + itauq = ie + *n; + itaup = itauq + *n; + itemp = itaup + *n; + i__2 = *lwork - itemp + 1; + dgebrd_(m, n, &a[a_offset], lda, &work[id], &work[ie], &work[ + itauq], &work[itaup], &work[itemp], &i__2, info); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 14*N + 2*N*(N+1)) */ + + itgkz = itemp; + itemp = itgkz + *n * ((*n << 1) + 1); + i__2 = *n << 1; + dbdsvdx_("U", jobz, rngtgk, n, &work[id], &work[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[ + itemp], &iwork[1], info); + +/* If needed, compute left singular vectors. */ + + if (wantu) { + j = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + dcopy_(n, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1); + j += *n << 1; + } + i__2 = *m - *n; + dlaset_("A", &i__2, ns, &c_b109, &c_b109, &u[*n + 1 + u_dim1], + ldu); + +/* Call DORMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + dormbr_("Q", "L", "N", m, ns, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[itemp], &i__2, &ierr); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + j = itgkz + *n; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + dcopy_(n, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt); + j += *n << 1; + } + +/* Call DORMBR to compute VB**T * PB**T */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + dormbr_("P", "R", "T", ns, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[itemp], &i__2, & + ierr); + } + } + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce A using the LQ decomposition. */ + + if (*n >= mnthr) { + +/* Path 1t (N much larger than M): */ +/* A = L * Q = ( QB * B * PB**T ) * Q */ +/* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q */ +/* U = QB * UB ; V**T = VB**T * PB**T * Q */ + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + itau = 1; + itemp = itau + *m; + i__2 = *lwork - itemp + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[itemp], &i__2, + info); +/* Copy L into WORK and bidiagonalize it: */ +/* (Workspace in WORK( ITEMP ): need M*M+5*N, prefer M*M+4*M+2*M*NB) */ + + ilqf = itemp; + id = ilqf + *m * *m; + ie = id + *m; + itauq = ie + *m; + itaup = itauq + *m; + itemp = itaup + *m; + dlacpy_("L", m, m, &a[a_offset], lda, &work[ilqf], m); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_("U", &i__2, &i__3, &c_b109, &c_b109, &work[ilqf + *m], m); + i__2 = *lwork - itemp + 1; + dgebrd_(m, m, &work[ilqf], m, &work[id], &work[ie], &work[itauq], + &work[itaup], &work[itemp], &i__2, info); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 2*M*M+14*M) */ + + itgkz = itemp; + itemp = itgkz + *m * ((*m << 1) + 1); + i__2 = *m << 1; + dbdsvdx_("U", jobz, rngtgk, m, &work[id], &work[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[ + itemp], &iwork[1], info); + +/* If needed, compute left singular vectors. */ + + if (wantu) { + j = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + dcopy_(m, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1); + j += *m << 1; + } + +/* Call DORMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + dormbr_("Q", "L", "N", m, ns, m, &work[ilqf], m, &work[itauq], + &u[u_offset], ldu, &work[itemp], &i__2, info); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + j = itgkz + *m; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + dcopy_(m, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt); + j += *m << 1; + } + i__2 = *n - *m; + dlaset_("A", ns, &i__2, &c_b109, &c_b109, &vt[(*m + 1) * + vt_dim1 + 1], ldvt); + +/* Call DORMBR to compute (VB**T)*(PB**T) */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + dormbr_("P", "R", "T", ns, m, m, &work[ilqf], m, &work[itaup], + &vt[vt_offset], ldvt, &work[itemp], &i__2, info); + +/* Call DORMLQ to compute ((VB**T)*(PB**T))*Q. */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + dormlq_("R", "N", ns, n, m, &a[a_offset], lda, &work[itau], & + vt[vt_offset], ldvt, &work[itemp], &i__2, info); + } + } else { + +/* Path 2t (N greater than M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ +/* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T */ +/* U = QB * UB; V**T = VB**T * PB**T */ + +/* Bidiagonalize A */ +/* (Workspace: need 4*M+N, prefer 4*M+(M+N)*NB) */ + + id = 1; + ie = id + *m; + itauq = ie + *m; + itaup = itauq + *m; + itemp = itaup + *m; + i__2 = *lwork - itemp + 1; + dgebrd_(m, n, &a[a_offset], lda, &work[id], &work[ie], &work[ + itauq], &work[itaup], &work[itemp], &i__2, info); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 2*M*M+14*M) */ + + itgkz = itemp; + itemp = itgkz + *m * ((*m << 1) + 1); + i__2 = *m << 1; + dbdsvdx_("L", jobz, rngtgk, m, &work[id], &work[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[ + itemp], &iwork[1], info); + +/* If needed, compute left singular vectors. */ + + if (wantu) { + j = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + dcopy_(m, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1); + j += *m << 1; + } + +/* Call DORMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + dormbr_("Q", "L", "N", m, ns, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[itemp], &i__2, info); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + j = itgkz + *m; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + dcopy_(m, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt); + j += *m << 1; + } + i__2 = *n - *m; + dlaset_("A", ns, &i__2, &c_b109, &c_b109, &vt[(*m + 1) * + vt_dim1 + 1], ldvt); + +/* Call DORMBR to compute VB**T * PB**T */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + dormbr_("P", "R", "T", ns, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[itemp], &i__2, + info); + } + } + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (anrm < smlnum) { + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGESVDX */ + +} /* dgesvdx_ */ + diff --git a/lapack-netlib/SRC/dgesvj.c b/lapack-netlib/SRC/dgesvj.c new file mode 100644 index 000000000..a49531426 --- /dev/null +++ b/lapack-netlib/SRC/dgesvj.c @@ -0,0 +1,2233 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGESVJ */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESVJ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, */ +/* LDV, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDV, LWORK, M, MV, N */ +/* CHARACTER*1 JOBA, JOBU, JOBV */ +/* DOUBLE PRECISION A( LDA, * ), SVA( N ), V( LDV, * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESVJ computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, where M >= N. The SVD of A is written as */ +/* > [++] [xx] [x0] [xx] */ +/* > A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] */ +/* > [++] [xx] */ +/* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */ +/* > matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */ +/* > of SIGMA are the singular values of A. The columns of U and V are the */ +/* > left and the right singular vectors of A, respectively. */ +/* > DGESVJ can sometimes compute tiny singular values and their singular vectors much */ +/* > more accurately than other SVD routines, see below under Further Details. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBA */ +/* > \verbatim */ +/* > JOBA is CHARACTER*1 */ +/* > Specifies the structure of A. */ +/* > = 'L': The input matrix A is lower triangular; */ +/* > = 'U': The input matrix A is upper triangular; */ +/* > = 'G': The input matrix A is general M-by-N matrix, M >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies whether to compute the left singular vectors */ +/* > (columns of U): */ +/* > = 'U': The left singular vectors corresponding to the nonzero */ +/* > singular values are computed and returned in the leading */ +/* > columns of A. See more details in the description of A. */ +/* > The default numerical orthogonality threshold is set to */ +/* > approximately TOL=CTOL*EPS, CTOL=DSQRT(M), EPS=DLAMCH('E'). */ +/* > = 'C': Analogous to JOBU='U', except that user can control the */ +/* > level of numerical orthogonality of the computed left */ +/* > singular vectors. TOL can be set to TOL = CTOL*EPS, where */ +/* > CTOL is given on input in the array WORK. */ +/* > No CTOL smaller than ONE is allowed. CTOL greater */ +/* > than 1 / EPS is meaningless. The option 'C' */ +/* > can be used if M*EPS is satisfactory orthogonality */ +/* > of the computed left singular vectors, so CTOL=M could */ +/* > save few sweeps of Jacobi rotations. */ +/* > See the descriptions of A and WORK(1). */ +/* > = 'N': The matrix U is not computed. However, see the */ +/* > description of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether to compute the right singular vectors, that */ +/* > is, the matrix V: */ +/* > = 'V': the matrix V is computed and returned in the array V */ +/* > = 'A': the Jacobi rotations are applied to the MV-by-N */ +/* > array V. In other words, the right singular vector */ +/* > matrix V is not computed explicitly, instead it is */ +/* > applied to an MV-by-N matrix initially stored in the */ +/* > first MV rows of V. */ +/* > = 'N': the matrix V is not computed and the array V is not */ +/* > referenced */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. 1/DLAMCH('E') > M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. */ +/* > M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit : */ +/* > If JOBU = 'U' .OR. JOBU = 'C' : */ +/* > If INFO = 0 : */ +/* > RANKA orthonormal columns of U are returned in the */ +/* > leading RANKA columns of the array A. Here RANKA <= N */ +/* > is the number of computed singular values of A that are */ +/* > above the underflow threshold DLAMCH('S'). The singular */ +/* > vectors corresponding to underflowed or zero singular */ +/* > values are not computed. The value of RANKA is returned */ +/* > in the array WORK as RANKA=NINT(WORK(2)). Also see the */ +/* > descriptions of SVA and WORK. The computed columns of U */ +/* > are mutually numerically orthogonal up to approximately */ +/* > TOL=DSQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU = 'C'), */ +/* > see the description of JOBU. */ +/* > If INFO > 0 : */ +/* > the procedure DGESVJ did not converge in the given number */ +/* > of iterations (sweeps). In that case, the computed */ +/* > columns of U may not be orthogonal up to TOL. The output */ +/* > U (stored in A), SIGMA (given by the computed singular */ +/* > values in SVA(1:N)) and V is still a decomposition of the */ +/* > input matrix A in the sense that the residual */ +/* > ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small. */ +/* > */ +/* > If JOBU = 'N' : */ +/* > If INFO = 0 : */ +/* > Note that the left singular vectors are 'for free' in the */ +/* > one-sided Jacobi SVD algorithm. However, if only the */ +/* > singular values are needed, the level of numerical */ +/* > orthogonality of U is not an issue and iterations are */ +/* > stopped when the columns of the iterated matrix are */ +/* > numerically orthogonal up to approximately M*EPS. Thus, */ +/* > on exit, A contains the columns of U scaled with the */ +/* > corresponding singular values. */ +/* > If INFO > 0 : */ +/* > the procedure DGESVJ did not converge in the given number */ +/* > of iterations (sweeps). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SVA */ +/* > \verbatim */ +/* > SVA is DOUBLE PRECISION array, dimension (N) */ +/* > On exit : */ +/* > If INFO = 0 : */ +/* > depending on the value SCALE = WORK(1), we have: */ +/* > If SCALE = ONE : */ +/* > SVA(1:N) contains the computed singular values of A. */ +/* > During the computation SVA contains the Euclidean column */ +/* > norms of the iterated matrices in the array A. */ +/* > If SCALE .NE. ONE : */ +/* > The singular values of A are SCALE*SVA(1:N), and this */ +/* > factored representation is due to the fact that some of the */ +/* > singular values of A might underflow or overflow. */ +/* > If INFO > 0 : */ +/* > the procedure DGESVJ did not converge in the given number of */ +/* > iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MV */ +/* > \verbatim */ +/* > MV is INTEGER */ +/* > If JOBV = 'A', then the product of Jacobi rotations in DGESVJ */ +/* > is applied to the first MV rows of V. See the description of JOBV. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension (LDV,N) */ +/* > If JOBV = 'V', then V contains on exit the N-by-N matrix of */ +/* > the right singular vectors; */ +/* > If JOBV = 'A', then V contains the product of the computed right */ +/* > singular vector matrix and the initial matrix in */ +/* > the array V. */ +/* > If JOBV = 'N', then V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V', then LDV >= f2cmax(1,N). */ +/* > If JOBV = 'A', then LDV >= f2cmax(1,MV) . */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (LWORK) */ +/* > On entry : */ +/* > If JOBU = 'C' : */ +/* > WORK(1) = CTOL, where CTOL defines the threshold for convergence. */ +/* > The process stops if all columns of A are mutually */ +/* > orthogonal up to CTOL*EPS, EPS=DLAMCH('E'). */ +/* > It is required that CTOL >= ONE, i.e. it is not */ +/* > allowed to force the routine to obtain orthogonality */ +/* > below EPS. */ +/* > On exit : */ +/* > WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) */ +/* > are the computed singular values of A. */ +/* > (See description of SVA().) */ +/* > WORK(2) = NINT(WORK(2)) is the number of the computed nonzero */ +/* > singular values. */ +/* > WORK(3) = NINT(WORK(3)) is the number of the computed singular */ +/* > values that are larger than the underflow threshold. */ +/* > WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi */ +/* > rotations needed for numerical convergence. */ +/* > WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. */ +/* > This is useful information in cases when DGESVJ did */ +/* > not converge, as it can be used to estimate whether */ +/* > the output is still useful and for post festum analysis. */ +/* > WORK(6) = the largest absolute value over all sines of the */ +/* > Jacobi rotation angles in the last sweep. It can be */ +/* > useful for a post festum analysis. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > length of WORK, WORK >= MAX(6,M+N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value */ +/* > > 0: DGESVJ did not converge in the maximal allowed number (30) */ +/* > of sweeps. The output may still be useful. See the */ +/* > description of WORK. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane */ +/* > rotations. The rotations are implemented as fast scaled rotations of */ +/* > Anda and Park [1]. In the case of underflow of the Jacobi angle, a */ +/* > modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses */ +/* > column interchanges of de Rijk [2]. The relative accuracy of the computed */ +/* > singular values and the accuracy of the computed singular vectors (in */ +/* > angle metric) is as guaranteed by the theory of Demmel and Veselic [3]. */ +/* > The condition number that determines the accuracy in the full rank case */ +/* > is essentially min_{D=diag} kappa(A*D), where kappa(.) is the */ +/* > spectral condition number. The best performance of this Jacobi SVD */ +/* > procedure is achieved if used in an accelerated version of Drmac and */ +/* > Veselic [5,6], and it is the kernel routine in the SIGMA library [7]. */ +/* > Some tunning parameters (marked with [TP]) are available for the */ +/* > implementer. */ +/* > The computational range for the nonzero singular values is the machine */ +/* > number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even */ +/* > denormalized singular values can be computed with the corresponding */ +/* > gradual loss of accurate digits. */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ============ */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ +/* > \endverbatim */ + +/* > \par References: */ +/* ================ */ +/* > */ +/* > \verbatim */ +/* > */ +/* > [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling. */ +/* > SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174. */ +/* > [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the */ +/* > singular value decomposition on a vector computer. */ +/* > SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. */ +/* > [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. */ +/* > [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular */ +/* > value computation in floating point arithmetic. */ +/* > SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. */ +/* > [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */ +/* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */ +/* > LAPACK Working note 169. */ +/* > [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */ +/* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */ +/* > LAPACK Working note 170. */ +/* > [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */ +/* > QSVD, (H,K)-SVD computations. */ +/* > Department of Mathematics, University of Zagreb, 2008. */ +/* > \endverbatim */ + +/* > \par Bugs, examples and comments: */ +/* ================================= */ +/* > */ +/* > \verbatim */ +/* > =========================== */ +/* > Please report all bugs and send interesting test examples and comments to */ +/* > drmac@math.hr. Thank you. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgesvj_(char *joba, char *jobu, char *jobv, integer *m, + integer *n, doublereal *a, integer *lda, doublereal *sva, integer *mv, + doublereal *v, integer *ldv, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2; + + /* Local variables */ + doublereal aapp, aapq, aaqq; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal ctol; + integer ierr; + doublereal bigtheta; + integer pskipped; + doublereal aapp0; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + doublereal temp1; + integer i__, p, q; + doublereal t; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal large, apoaq, aqoap; + extern logical lsame_(char *, char *); + doublereal theta, small, sfmin; + logical lsvec; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal fastr[5]; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal epsln; + logical applv, rsvec; + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + logical uctol; + extern /* Subroutine */ int drotm_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *); + logical lower, upper, rotok; + integer n2, n4; + extern /* Subroutine */ int dgsvj0_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), dgsvj1_( + char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *); + doublereal rootsfmin; + integer n34; + doublereal cs; + extern doublereal dlamch_(char *); + doublereal sn; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + integer ijblsk, swband, blskip; + doublereal mxaapq; + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + doublereal thsign, mxsinj; + integer ir1, emptsw, notrot, iswrot, jbc; + doublereal big; + integer kbl, lkahead, igl, ibr, jgl, nbl; + doublereal skl; + logical goscale; + doublereal tol; + integer mvl; + logical noscale; + doublereal rootbig, rooteps; + integer rowskip; + doublereal roottol; + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + +/* from BLAS */ +/* from LAPACK */ +/* from BLAS */ +/* from LAPACK */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + --sva; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --work; + + /* Function Body */ + lsvec = lsame_(jobu, "U"); + uctol = lsame_(jobu, "C"); + rsvec = lsame_(jobv, "V"); + applv = lsame_(jobv, "A"); + upper = lsame_(joba, "U"); + lower = lsame_(joba, "L"); + + if (! (upper || lower || lsame_(joba, "G"))) { + *info = -1; + } else if (! (lsvec || uctol || lsame_(jobu, "N"))) + { + *info = -2; + } else if (! (rsvec || applv || lsame_(jobv, "N"))) + { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0 || *n > *m) { + *info = -5; + } else if (*lda < *m) { + *info = -7; + } else if (*mv < 0) { + *info = -9; + } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { + *info = -11; + } else if (uctol && work[1] <= 1.) { + *info = -12; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m + *n; + if (*lwork < f2cmax(i__1,6)) { + *info = -13; + } else { + *info = 0; + } + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGESVJ", &i__1, (ftnlen)6); + return 0; + } + +/* #:) Quick return for void matrix */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Set numerical parameters */ +/* The stopping criterion for Jacobi rotations is */ + +/* max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS */ + +/* where EPS is the round-off and CTOL is defined as follows: */ + + if (uctol) { +/* ... user controlled */ + ctol = work[1]; + } else { +/* ... default */ + if (lsvec || rsvec || applv) { + ctol = sqrt((doublereal) (*m)); + } else { + ctol = (doublereal) (*m); + } + } +/* ... and the machine dependent parameters are */ +/* [!] (Make sure that DLAMCH() works properly on the target machine.) */ + + epsln = dlamch_("Epsilon"); + rooteps = sqrt(epsln); + sfmin = dlamch_("SafeMinimum"); + rootsfmin = sqrt(sfmin); + small = sfmin / epsln; + big = dlamch_("Overflow"); +/* BIG = ONE / SFMIN */ + rootbig = 1. / rootsfmin; + large = big / sqrt((doublereal) (*m * *n)); + bigtheta = 1. / rooteps; + + tol = ctol * epsln; + roottol = sqrt(tol); + + if ((doublereal) (*m) * epsln >= 1.) { + *info = -4; + i__1 = -(*info); + xerbla_("DGESVJ", &i__1, (ftnlen)6); + return 0; + } + +/* Initialize the right singular vector matrix. */ + + if (rsvec) { + mvl = *n; + dlaset_("A", &mvl, n, &c_b17, &c_b18, &v[v_offset], ldv); + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + +/* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) */ +/* (!) If necessary, scale A to protect the largest singular value */ +/* from overflow. It is possible that saving the largest singular */ +/* value destroys the information about the small ones. */ +/* This initial scaling is almost minimal in the sense that the */ +/* goal is to make sure that no column norm overflows, and that */ +/* DSQRT(N)*max_i SVA(i) does not overflow. If INFinite entries */ +/* in A are detected, the procedure returns with INFO=-6. */ + + skl = 1. / sqrt((doublereal) (*m) * (doublereal) (*n)); + noscale = TRUE_; + goscale = TRUE_; + + if (lower) { +/* the input matrix is M-by-N lower triangular (trapezoidal) */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 1.; + i__2 = *m - p + 1; + dlassq_(&i__2, &a[p + p * a_dim1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + xerbla_("DGESVJ", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * skl); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= skl; +/* L1873: */ + } + } + } +/* L1874: */ + } + } else if (upper) { +/* the input matrix is M-by-N upper triangular (trapezoidal) */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 1.; + dlassq_(&p, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + xerbla_("DGESVJ", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * skl); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= skl; +/* L2873: */ + } + } + } +/* L2874: */ + } + } else { +/* the input matrix is M-by-N general dense */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 1.; + dlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + xerbla_("DGESVJ", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * skl); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= skl; +/* L3873: */ + } + } + } +/* L3874: */ + } + } + + if (noscale) { + skl = 1.; + } + +/* Move the smaller part of the spectrum from the underflow threshold */ +/* (!) Start by determining the position of the nonzero entries of the */ +/* array SVA() relative to ( SFMIN, BIG ). */ + + aapp = 0.; + aaqq = big; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + if (sva[p] != 0.) { +/* Computing MIN */ + d__1 = aaqq, d__2 = sva[p]; + aaqq = f2cmin(d__1,d__2); + } +/* Computing MAX */ + d__1 = aapp, d__2 = sva[p]; + aapp = f2cmax(d__1,d__2); +/* L4781: */ + } + +/* #:) Quick return for zero matrix */ + + if (aapp == 0.) { + if (lsvec) { + dlaset_("G", m, n, &c_b17, &c_b18, &a[a_offset], lda); + } + work[1] = 1.; + work[2] = 0.; + work[3] = 0.; + work[4] = 0.; + work[5] = 0.; + work[6] = 0.; + return 0; + } + +/* #:) Quick return for one-column matrix */ + + if (*n == 1) { + if (lsvec) { + dlascl_("G", &c__0, &c__0, &sva[1], &skl, m, &c__1, &a[a_dim1 + 1] + , lda, &ierr); + } + work[1] = 1. / skl; + if (sva[1] >= sfmin) { + work[2] = 1.; + } else { + work[2] = 0.; + } + work[3] = 0.; + work[4] = 0.; + work[5] = 0.; + work[6] = 0.; + return 0; + } + +/* Protect small singular values from underflow, and try to */ +/* avoid underflows/overflows in computing Jacobi rotations. */ + + sn = sqrt(sfmin / epsln); + temp1 = sqrt(big / (doublereal) (*n)); + if (aapp <= sn || aaqq >= temp1 || sn <= aaqq && aapp <= temp1) { +/* Computing MIN */ + d__1 = big, d__2 = temp1 / aapp; + temp1 = f2cmin(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq <= sn && aapp <= temp1) { +/* Computing MIN */ + d__1 = sn / aaqq, d__2 = big / (aapp * sqrt((doublereal) (*n))); + temp1 = f2cmin(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq >= sn && aapp >= temp1) { +/* Computing MAX */ + d__1 = sn / aaqq, d__2 = temp1 / aapp; + temp1 = f2cmax(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq <= sn && aapp >= temp1) { +/* Computing MIN */ + d__1 = sn / aaqq, d__2 = big / (sqrt((doublereal) (*n)) * aapp); + temp1 = f2cmin(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else { + temp1 = 1.; + } + +/* Scale, if necessary */ + + if (temp1 != 1.) { + dlascl_("G", &c__0, &c__0, &c_b18, &temp1, n, &c__1, &sva[1], n, & + ierr); + } + skl = temp1 * skl; + if (skl != 1.) { + dlascl_(joba, &c__0, &c__0, &c_b18, &skl, m, n, &a[a_offset], lda, & + ierr); + skl = 1. / skl; + } + +/* Row-cyclic Jacobi SVD algorithm with column pivoting */ + + emptsw = *n * (*n - 1) / 2; + notrot = 0; + fastr[0] = 0.; + +/* A is represented in factored form A = A * diag(WORK), where diag(WORK) */ +/* is initialized to identity. WORK is updated during fast scaled */ +/* rotations. */ + + i__1 = *n; + for (q = 1; q <= i__1; ++q) { + work[q] = 1.; +/* L1868: */ + } + + + swband = 3; +/* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */ +/* if DGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm DGESVJ. For sweeps i=1:SWBAND the procedure */ +/* works on pivots inside a band-like region around the diagonal. */ +/* The boundaries are determined dynamically, based on the number of */ +/* pivots above a threshold. */ + + kbl = f2cmin(8,*n); +/* [TP] KBL is a tuning parameter that defines the tile size in the */ +/* tiling of the p-q loops of pivot pairs. In general, an optimal */ +/* value of KBL depends on the matrix dimensions and on the */ +/* parameters of the computer's memory. */ + + nbl = *n / kbl; + if (nbl * kbl != *n) { + ++nbl; + } + +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + + rowskip = f2cmin(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + + lkahead = 1; +/* [TP] LKAHEAD is a tuning parameter. */ + +/* Quasi block transformations, using the lower (upper) triangular */ +/* structure of the input matrix. The quasi-block-cycling usually */ +/* invokes cubic convergence. Big part of this cycle is done inside */ +/* canonical subspaces of dimensions less than M. */ + +/* Computing MAX */ + i__1 = 64, i__2 = kbl << 2; + if ((lower || upper) && *n > f2cmax(i__1,i__2)) { +/* [TP] The number of partition levels and the actual partition are */ +/* tuning parameters. */ + n4 = *n / 4; + n2 = *n / 2; + n34 = n4 * 3; + if (applv) { + q = 0; + } else { + q = 1; + } + + if (lower) { + +/* This works very well on lower triangular matrices, in particular */ +/* in the framework of the preconditioned Jacobi SVD (xGEJSV). */ +/* The idea is simple: */ +/* [+ 0 0 0] Note that Jacobi transformations of [0 0] */ +/* [+ + 0 0] [0 0] */ +/* [+ + x 0] actually work on [x 0] [x 0] */ +/* [+ + x x] [x x]. [x x] */ + + i__1 = *m - n34; + i__2 = *n - n34; + i__3 = *lwork - *n; + dgsvj0_(jobv, &i__1, &i__2, &a[n34 + 1 + (n34 + 1) * a_dim1], lda, + &work[n34 + 1], &sva[n34 + 1], &mvl, &v[n34 * q + 1 + ( + n34 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, & + work[*n + 1], &i__3, &ierr); + + i__1 = *m - n2; + i__2 = n34 - n2; + i__3 = *lwork - *n; + dgsvj0_(jobv, &i__1, &i__2, &a[n2 + 1 + (n2 + 1) * a_dim1], lda, & + work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) + * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, &work[*n + + 1], &i__3, &ierr); + + i__1 = *m - n2; + i__2 = *n - n2; + i__3 = *lwork - *n; + dgsvj1_(jobv, &i__1, &i__2, &n4, &a[n2 + 1 + (n2 + 1) * a_dim1], + lda, &work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + ( + n2 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, & + work[*n + 1], &i__3, &ierr); + + i__1 = *m - n4; + i__2 = n2 - n4; + i__3 = *lwork - *n; + dgsvj0_(jobv, &i__1, &i__2, &a[n4 + 1 + (n4 + 1) * a_dim1], lda, & + work[n4 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) + * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + + 1], &i__3, &ierr); + + i__1 = *lwork - *n; + dgsvj0_(jobv, m, &n4, &a[a_offset], lda, &work[1], &sva[1], &mvl, + &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + + 1], &i__1, &ierr); + + i__1 = *lwork - *n; + dgsvj1_(jobv, m, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], & + mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, & + work[*n + 1], &i__1, &ierr); + + + } else if (upper) { + + + i__1 = *lwork - *n; + dgsvj0_(jobv, &n4, &n4, &a[a_offset], lda, &work[1], &sva[1], & + mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__2, & + work[*n + 1], &i__1, &ierr); + + i__1 = *lwork - *n; + dgsvj0_(jobv, &n2, &n4, &a[(n4 + 1) * a_dim1 + 1], lda, &work[n4 + + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) * + v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + 1], + &i__1, &ierr); + + i__1 = *lwork - *n; + dgsvj1_(jobv, &n2, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], + &mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, & + work[*n + 1], &i__1, &ierr); + + i__1 = n2 + n4; + i__2 = *lwork - *n; + dgsvj0_(jobv, &i__1, &n4, &a[(n2 + 1) * a_dim1 + 1], lda, &work[ + n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) * + v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + 1], + &i__2, &ierr); + } + + } + + + for (i__ = 1; i__ <= 30; ++i__) { + + + mxaapq = 0.; + mxsinj = 0.; + iswrot = 0; + + notrot = 0; + pskipped = 0; + +/* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */ +/* 1 <= p < q <= N. This is the first step toward a blocked implementation */ +/* of the rotations. New implementation, based on block transformations, */ +/* is under development. */ + + i__1 = nbl; + for (ibr = 1; ibr <= i__1; ++ibr) { + + igl = (ibr - 1) * kbl + 1; + +/* Computing MIN */ + i__3 = lkahead, i__4 = nbl - ibr; + i__2 = f2cmin(i__3,i__4); + for (ir1 = 0; ir1 <= i__2; ++ir1) { + + igl += ir1 * kbl; + +/* Computing MIN */ + i__4 = igl + kbl - 1, i__5 = *n - 1; + i__3 = f2cmin(i__4,i__5); + for (p = igl; p <= i__3; ++p) { + + + i__4 = *n - p + 1; + q = idamax_(&i__4, &sva[p], &c__1) + p - 1; + if (p != q) { + dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + + 1], &c__1); + if (rsvec) { + dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1); + } + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = work[p]; + work[p] = work[q]; + work[q] = temp1; + } + + if (ir1 == 0) { + +/* Column norms are periodically updated by explicit */ +/* norm computation. */ +/* Caveat: */ +/* Unfortunately, some BLAS implementations compute DNRM2(M,A(1,p),1) */ +/* as DSQRT(DDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to */ +/* overflow for ||A(:,p)||_2 > DSQRT(overflow_threshold), and to */ +/* underflow for ||A(:,p)||_2 < DSQRT(underflow_threshold). */ +/* Hence, DNRM2 cannot be trusted, not even in the case when */ +/* the true norm is far from the under(over)flow boundaries. */ +/* If properly implemented DNRM2 is available, the IF-THEN-ELSE */ +/* below should read "AAPP = DNRM2( M, A(1,p), 1 ) * WORK(p)". */ + + if (sva[p] < rootbig && sva[p] > rootsfmin) { + sva[p] = dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * + work[p]; + } else { + temp1 = 0.; + aapp = 1.; + dlassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, & + aapp); + sva[p] = temp1 * sqrt(aapp) * work[p]; + } + aapp = sva[p]; + } else { + aapp = sva[p]; + } + + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = f2cmin(i__5,*n); + for (q = p + 1; q <= i__4; ++q) { + + aaqq = sva[q]; + + if (aaqq > 0.) { + + aapp0 = aapp; + if (aaqq >= 1.) { + rotok = small * aapp <= aaqq; + if (aapp < big / aaqq) { + aapq = ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + dcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + dlascl_("G", &c__0, &c__0, &aapp, & + work[p], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = ddot_(m, &work[*n + 1], &c__1, + &a[q * a_dim1 + 1], &c__1) * + work[q] / aaqq; + } + } else { + rotok = aapp <= aaqq / small; + if (aapp > small / aaqq) { + aapq = ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + dcopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + dlascl_("G", &c__0, &c__0, &aaqq, & + work[q], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = ddot_(m, &work[*n + 1], &c__1, + &a[p * a_dim1 + 1], &c__1) * + work[p] / aapp; + } + } + +/* Computing MAX */ + d__1 = mxaapq, d__2 = abs(aapq); + mxaapq = f2cmax(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > tol) { + +/* [RTD] ROTATED = ROTATED + ONE */ + + if (ir1 == 0) { + notrot = 0; + pskipped = 0; + ++iswrot; + } + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq; + + if (abs(theta) > bigtheta) { + + t = .5 / theta; + fastr[2] = t * work[p] / work[q]; + fastr[3] = -t * work[q] / work[p]; + drotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq; + aapp *= sqrt((f2cmax(d__1,d__2))); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = f2cmax(d__1,d__2); + + } else { + + + thsign = -d_sign(&c_b18, &aapq); + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; + +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = f2cmax(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq; + aapp *= sqrt((f2cmax(d__1,d__2))); + + apoaq = work[p] / work[q]; + aqoap = work[q] / work[p]; + if (work[p] >= 1.) { + if (work[q] >= 1.) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + work[p] *= cs; + work[q] *= cs; + drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + d__1 = -t * aqoap; + daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + work[p] *= cs; + work[q] /= cs; + if (rsvec) { + d__1 = -t * aqoap; + daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + } + } else { + if (work[q] >= 1.) { + d__1 = t * apoaq; + daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + work[p] /= cs; + work[q] *= cs; + if (rsvec) { + d__1 = t * apoaq; + daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + } else { + if (work[p] >= work[q]) { + d__1 = -t * aqoap; + daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + work[p] *= cs; + work[q] /= cs; + if (rsvec) { + d__1 = -t * aqoap; + daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + d__1 = cs * sn * apoaq; + daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + d__1 = t * apoaq; + daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + work[p] /= cs; + work[q] *= cs; + if (rsvec) { + d__1 = t * apoaq; + daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + d__1 = -cs * sn * aqoap; + daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + + } else { + dcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + dlascl_("G", &c__0, &c__0, &aapp, & + c_b18, m, &c__1, &work[*n + 1] + , lda, &ierr); + dlascl_("G", &c__0, &c__0, &aaqq, & + c_b18, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * work[p] / work[q]; + daxpy_(m, &temp1, &work[*n + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + dlascl_("G", &c__0, &c__0, &c_b18, & + aaqq, m, &c__1, &a[q * a_dim1 + + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * aapq; + sva[q] = aaqq * sqrt((f2cmax(d__1,d__2))) + ; + mxsinj = f2cmax(mxsinj,sfmin); + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* recompute SVA(q), SVA(p). */ + +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = dnrm2_(m, &a[q * a_dim1 + + 1], &c__1) * work[q]; + } else { + t = 0.; + aaqq = 1.; + dlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * work[q]; + } + } + if (aapp / aapp0 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = dnrm2_(m, &a[p * a_dim1 + + 1], &c__1) * work[p]; + } else { + t = 0.; + aapp = 1.; + dlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * work[p]; + } + sva[p] = aapp; + } + + } else { +/* A(:,p) and A(:,q) already numerically orthogonal */ + if (ir1 == 0) { + ++notrot; + } +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + } + } else { +/* A(:,q) is zero column */ + if (ir1 == 0) { + ++notrot; + } + ++pskipped; + } + + if (i__ <= swband && pskipped > rowskip) { + if (ir1 == 0) { + aapp = -aapp; + } + notrot = 0; + goto L2103; + } + +/* L2002: */ + } +/* END q-LOOP */ + +L2103: +/* bailed out of q-loop */ + + sva[p] = aapp; + + } else { + sva[p] = aapp; + if (ir1 == 0 && aapp == 0.) { +/* Computing MIN */ + i__4 = igl + kbl - 1; + notrot = notrot + f2cmin(i__4,*n) - p; + } + } + +/* L2001: */ + } +/* end of the p-loop */ +/* end of doing the block ( ibr, ibr ) */ +/* L1002: */ + } +/* end of ir1-loop */ + +/* ... go to the off diagonal blocks */ + + igl = (ibr - 1) * kbl + 1; + + i__2 = nbl; + for (jbc = ibr + 1; jbc <= i__2; ++jbc) { + + jgl = (jbc - 1) * kbl + 1; + +/* doing the block at ( ibr, jbc ) */ + + ijblsk = 0; +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = f2cmin(i__4,*n); + for (p = igl; p <= i__3; ++p) { + + aapp = sva[p]; + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__5 = jgl + kbl - 1; + i__4 = f2cmin(i__5,*n); + for (q = jgl; q <= i__4; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.) { + aapp0 = aapp; + + +/* Safe Gram matrix computation */ + + if (aaqq >= 1.) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + aapq = ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + dcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + dlascl_("G", &c__0, &c__0, &aapp, & + work[p], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = ddot_(m, &work[*n + 1], &c__1, + &a[q * a_dim1 + 1], &c__1) * + work[q] / aaqq; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + aapq = ddot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + dcopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + dlascl_("G", &c__0, &c__0, &aaqq, & + work[q], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = ddot_(m, &work[*n + 1], &c__1, + &a[p * a_dim1 + 1], &c__1) * + work[p] / aapp; + } + } + +/* Computing MAX */ + d__1 = mxaapq, d__2 = abs(aapq); + mxaapq = f2cmax(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > tol) { + notrot = 0; +/* [RTD] ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq; + if (aaqq > aapp0) { + theta = -theta; + } + + if (abs(theta) > bigtheta) { + t = .5 / theta; + fastr[2] = t * work[p] / work[q]; + fastr[3] = -t * work[q] / work[p]; + drotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq; + aapp *= sqrt((f2cmax(d__1,d__2))); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = f2cmax(d__1,d__2); + } else { + + + thsign = -d_sign(&c_b18, &aapq); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = f2cmax(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq; + aapp *= sqrt((f2cmax(d__1,d__2))); + + apoaq = work[p] / work[q]; + aqoap = work[q] / work[p]; + if (work[p] >= 1.) { + + if (work[q] >= 1.) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + work[p] *= cs; + work[q] *= cs; + drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + d__1 = -t * aqoap; + daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + if (rsvec) { + d__1 = -t * aqoap; + daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + work[p] *= cs; + work[q] /= cs; + } + } else { + if (work[q] >= 1.) { + d__1 = t * apoaq; + daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + if (rsvec) { + d__1 = t * apoaq; + daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + work[p] /= cs; + work[q] *= cs; + } else { + if (work[p] >= work[q]) { + d__1 = -t * aqoap; + daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__1 = cs * sn * apoaq; + daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + work[p] *= cs; + work[q] /= cs; + if (rsvec) { + d__1 = -t * aqoap; + daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + d__1 = cs * sn * apoaq; + daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + d__1 = t * apoaq; + daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__1 = -cs * sn * aqoap; + daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + work[p] /= cs; + work[q] *= cs; + if (rsvec) { + d__1 = t * apoaq; + daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + d__1 = -cs * sn * aqoap; + daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + + } else { + if (aapp > aaqq) { + dcopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[*n + 1], & + c__1); + dlascl_("G", &c__0, &c__0, &aapp, + &c_b18, m, &c__1, &work[* + n + 1], lda, &ierr); + dlascl_("G", &c__0, &c__0, &aaqq, + &c_b18, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * work[p] / work[q]; + daxpy_(m, &temp1, &work[*n + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1); + dlascl_("G", &c__0, &c__0, &c_b18, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * + aapq; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); + mxsinj = f2cmax(mxsinj,sfmin); + } else { + dcopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[*n + 1], & + c__1); + dlascl_("G", &c__0, &c__0, &aaqq, + &c_b18, m, &c__1, &work[* + n + 1], lda, &ierr); + dlascl_("G", &c__0, &c__0, &aapp, + &c_b18, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * work[q] / work[p]; + daxpy_(m, &temp1, &work[*n + 1], & + c__1, &a[p * a_dim1 + 1], + &c__1); + dlascl_("G", &c__0, &c__0, &c_b18, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq * + aapq; + sva[p] = aapp * sqrt((f2cmax(d__1, + d__2))); + mxsinj = f2cmax(mxsinj,sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q) */ +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = dnrm2_(m, &a[q * a_dim1 + + 1], &c__1) * work[q]; + } else { + t = 0.; + aaqq = 1.; + dlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * work[q]; + } + } +/* Computing 2nd power */ + d__1 = aapp / aapp0; + if (d__1 * d__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = dnrm2_(m, &a[p * a_dim1 + + 1], &c__1) * work[p]; + } else { + t = 0.; + aapp = 1.; + dlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * work[p]; + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + + sva[p] = aapp; + + } else { + + if (aapp == 0.) { +/* Computing MIN */ + i__4 = jgl + kbl - 1; + notrot = notrot + f2cmin(i__4,*n) - jgl + 1; + } + if (aapp < 0.) { + notrot = 0; + } + + } + +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__3 = igl + kbl - 1; + i__2 = f2cmin(i__3,*n); + for (p = igl; p <= i__2; ++p) { + sva[p] = (d__1 = sva[p], abs(d__1)); +/* L2012: */ + } +/* ** */ +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = dnrm2_(m, &a[*n * a_dim1 + 1], &c__1) * work[*n]; + } else { + t = 0.; + aapp = 1.; + dlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp) * work[*n]; + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + + if (i__ > swband + 1 && mxaapq < sqrt((doublereal) (*n)) * tol && ( + doublereal) (*n) * mxaapq * mxsinj < tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } + +/* L1993: */ + } +/* end i=1:NSWEEP loop */ + +/* #:( Reaching this point means that the procedure has not converged. */ + *info = 29; + goto L1995; + +L1994: +/* #:) Reaching this point means numerical convergence after the i-th */ +/* sweep. */ + + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the singular values and find how many are above */ +/* the underflow threshold. */ + + n2 = 0; + n4 = 0; + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = idamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = work[p]; + work[p] = work[q]; + work[q] = temp1; + dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } + if (sva[p] != 0.) { + ++n4; + if (sva[p] * skl > sfmin) { + ++n2; + } + } +/* L5991: */ + } + if (sva[*n] != 0.) { + ++n4; + if (sva[*n] * skl > sfmin) { + ++n2; + } + } + +/* Normalize the left singular vectors. */ + + if (lsvec || uctol) { + i__1 = n2; + for (p = 1; p <= i__1; ++p) { + d__1 = work[p] / sva[p]; + dscal_(m, &d__1, &a[p * a_dim1 + 1], &c__1); +/* L1998: */ + } + } + +/* Scale the product of Jacobi rotations (assemble the fast rotations). */ + + if (rsvec) { + if (applv) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + dscal_(&mvl, &work[p], &v[p * v_dim1 + 1], &c__1); +/* L2398: */ + } + } else { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = 1. / dnrm2_(&mvl, &v[p * v_dim1 + 1], &c__1); + dscal_(&mvl, &temp1, &v[p * v_dim1 + 1], &c__1); +/* L2399: */ + } + } + } + +/* Undo scaling, if necessary (and possible). */ + if (skl > 1. && sva[1] < big / skl || skl < 1. && sva[f2cmax(n2,1)] > sfmin / + skl) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + sva[p] = skl * sva[p]; +/* L2400: */ + } + skl = 1.; + } + + work[1] = skl; +/* The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE */ +/* then some of the singular values may overflow or underflow and */ +/* the spectrum is given in this factored representation. */ + + work[2] = (doublereal) n4; +/* N4 is the number of computed nonzero singular values of A. */ + + work[3] = (doublereal) n2; +/* N2 is the number of singular values of A greater than SFMIN. */ +/* If N2 +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGESVX computes the solution to system of linear equations A * X = B for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, */ +/* EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, */ +/* WORK, IWORK, INFO ) */ + +/* CHARACTER EQUED, FACT, TRANS */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS */ +/* DOUBLE PRECISION RCOND */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ BERR( * ), C( * ), FERR( * ), R( * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESVX uses the LU factorization to compute the solution to a real */ +/* > system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > Error bounds on the solution and a condition estimate are also */ +/* > provided. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'E', real scaling factors are computed to equilibrate */ +/* > the system: */ +/* > TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* > TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* > TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ +/* > Whether or not the system will be equilibrated depends on the */ +/* > scaling of the matrix A, but if equilibration is used, A is */ +/* > overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* > or diag(C)*B (if TRANS = 'T' or 'C'). */ +/* > */ +/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the */ +/* > matrix A (after equilibration if FACT = 'E') as */ +/* > A = P * L * U, */ +/* > where P is a permutation matrix, L is a unit lower triangular */ +/* > matrix, and U is upper triangular. */ +/* > */ +/* > 3. If some U(i,i)=0, so that U is exactly singular, then the routine */ +/* > returns with INFO = i. Otherwise, the factored form of A is used */ +/* > to estimate the condition number of the matrix A. If the */ +/* > reciprocal of the condition number is less than machine precision, */ +/* > INFO = N+1 is returned as a warning, but the routine still goes on */ +/* > to solve for X and compute error bounds as described below. */ +/* > */ +/* > 4. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 5. Iterative refinement is applied to improve the computed solution */ +/* > matrix and calculate error bounds and backward error estimates */ +/* > for it. */ +/* > */ +/* > 6. If equilibration was used, the matrix X is premultiplied by */ +/* > diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* > that it solves the original system before equilibration. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of the matrix A is */ +/* > supplied on entry, and if not, whether the matrix A should be */ +/* > equilibrated before it is factored. */ +/* > = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* > If EQUED is not 'N', the matrix A has been */ +/* > equilibrated with scaling factors given by R and C. */ +/* > A, AF, and IPIV are not modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > = 'E': The matrix A will be equilibrated if necessary, then */ +/* > copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is */ +/* > not 'N', then A must have been equilibrated by the scaling */ +/* > factors in R and/or C. A is not modified if FACT = 'F' or */ +/* > 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ +/* > */ +/* > On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* > EQUED = 'R': A := diag(R) * A */ +/* > EQUED = 'C': A := A * diag(C) */ +/* > EQUED = 'B': A := diag(R) * A * diag(C). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AF */ +/* > \verbatim */ +/* > AF is DOUBLE PRECISION array, dimension (LDAF,N) */ +/* > If FACT = 'F', then AF is an input argument and on entry */ +/* > contains the factors L and U from the factorization */ +/* > A = P*L*U as computed by DGETRF. If EQUED .ne. 'N', then */ +/* > AF is the factored form of the equilibrated matrix A. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the equilibrated matrix A (see the description of A for */ +/* > the form of the equilibrated matrix). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > as computed by DGETRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the equilibrated matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done. */ +/* > = 'N': No equilibration (always true if FACT = 'N'). */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* > output argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. R is an input argument if FACT = 'F'; */ +/* > otherwise, R is an output argument. If FACT = 'F' and */ +/* > EQUED = 'R' or 'B', each element of R must be positive. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. C is an input argument if FACT = 'F'; */ +/* > otherwise, C is an output argument. If FACT = 'F' and */ +/* > EQUED = 'C' or 'B', each element of C must be positive. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, */ +/* > if EQUED = 'N', B is not modified; */ +/* > if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* > diag(R)*B; */ +/* > if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* > overwritten by diag(C)*B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* > If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */ +/* > to the original system of equations. Note that A and B are */ +/* > modified on exit if EQUED .ne. 'N', and the solution to the */ +/* > equilibrated system is inv(diag(C))*X if TRANS = 'N' and */ +/* > EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */ +/* > and EQUED = 'R' or 'B'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The estimate of the reciprocal condition number of the matrix */ +/* > A after equilibration (if done). If RCOND is less than the */ +/* > machine precision (in particular, if RCOND = 0), the matrix */ +/* > is singular to working precision. This condition is */ +/* > indicated by a return code of INFO > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*N) */ +/* > On exit, WORK(1) contains the reciprocal pivot growth */ +/* > factor norm(A)/norm(U). The "f2cmax absolute element" norm is */ +/* > used. If WORK(1) is much less than 1, then the stability */ +/* > of the LU factorization of the (equilibrated) matrix A */ +/* > could be poor. This also means that the solution X, condition */ +/* > estimator RCOND, and forward error bound FERR could be */ +/* > unreliable. If factorization fails with 0 WORK(1) contains the reciprocal pivot growth factor for the */ +/* > leading INFO columns of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: U(i,i) is exactly zero. The factorization has */ +/* > been completed, but the factor U is exactly */ +/* > singular, so the solution and error bounds */ +/* > could not be computed. RCOND = 0 is returned. */ +/* > = N+1: U is nonsingular, but RCOND is less than machine */ +/* > precision, meaning that the matrix is singular */ +/* > to working precision. Nevertheless, the */ +/* > solution and error bounds are computed because */ +/* > there are a number of situations where the */ +/* > computed solution can be more accurate than the */ +/* > value of RCOND would suggest. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup doubleGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + doublereal amax; + char norm[1]; + integer i__, j; + extern logical lsame_(char *, char *); + doublereal rcmin, rcmax, anorm; + logical equil; + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dlaqge_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, char *), dgecon_(char *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, integer *); + doublereal colcnd; + logical nofact; + extern /* Subroutine */ int dgeequ_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), dgerfs_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, integer *), + dgetrf_(integer *, integer *, doublereal *, integer *, integer *, + integer *), dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *), xerbla_(char *, + integer *, ftnlen); + doublereal bignum; + extern doublereal dlantr_(char *, char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *); + integer infequ; + logical colequ; + extern /* Subroutine */ int dgetrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + doublereal rowcnd; + logical notran; + doublereal smlnum; + logical rowequ; + doublereal rpvgrw; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + equil = lsame_(fact, "E"); + notran = lsame_(trans, "N"); + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + smlnum = dlamch_("Safe minimum"); + bignum = 1. / smlnum; + } + +/* Test the input parameters. */ + + if (! nofact && ! equil && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (lsame_(fact, "F") && ! (rowequ || colequ + || lsame_(equed, "N"))) { + *info = -10; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[j]; + rcmax = f2cmax(d__1,d__2); +/* L10: */ + } + if (rcmin <= 0.) { + *info = -11; + } else if (*n > 0) { + rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + rowcnd = 1.; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = f2cmax(d__1,d__2); +/* L20: */ + } + if (rcmin <= 0.) { + *info = -12; + } else if (*n > 0) { + colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + colcnd = 1.; + } + } + if (*info == 0) { + if (*ldb < f2cmax(1,*n)) { + *info = -14; + } else if (*ldx < f2cmax(1,*n)) { + *info = -16; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGESVX", &i__1, (ftnlen)6); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + dgeequ_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, & + amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + dlaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, & + colcnd, &amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + } + +/* Scale the right hand side. */ + + if (notran) { + if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = r__[i__] * b[i__ + j * b_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = c__[i__] * b[i__ + j * b_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + dlacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf); + dgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + rpvgrw = dlantr_("M", "U", "N", info, info, &af[af_offset], ldaf, + &work[1]); + if (rpvgrw == 0.) { + rpvgrw = 1.; + } else { + rpvgrw = dlange_("M", n, info, &a[a_offset], lda, &work[1]) / rpvgrw; + } + work[1] = rpvgrw; + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A and the */ +/* reciprocal pivot growth factor RPVGRW. */ + + if (notran) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = dlange_(norm, n, n, &a[a_offset], lda, &work[1]); + rpvgrw = dlantr_("M", "U", "N", n, n, &af[af_offset], ldaf, &work[1]); + if (rpvgrw == 0.) { + rpvgrw = 1.; + } else { + rpvgrw = dlange_("M", n, n, &a[a_offset], lda, &work[1]) / + rpvgrw; + } + +/* Compute the reciprocal of the condition number of A. */ + + dgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], + info); + +/* Compute the solution matrix X. */ + + dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + dgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + dgerfs_(trans, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], + &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[ + 1], &iwork[1], info); + +/* Transform the solution matrix X to a solution of the original */ +/* system. */ + + if (notran) { + if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] = c__[i__] * x[i__ + j * x_dim1]; +/* L70: */ + } +/* L80: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= colcnd; +/* L90: */ + } + } + } else if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] = r__[i__] * x[i__ + j * x_dim1]; +/* L100: */ + } +/* L110: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= rowcnd; +/* L120: */ + } + } + + work[1] = rpvgrw; + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < dlamch_("Epsilon")) { + *info = *n + 1; + } + return 0; + +/* End of DGESVX */ + +} /* dgesvx_ */ + diff --git a/lapack-netlib/SRC/dgesvxx.c b/lapack-netlib/SRC/dgesvxx.c new file mode 100644 index 000000000..27f99827a --- /dev/null +++ b/lapack-netlib/SRC/dgesvxx.c @@ -0,0 +1,1211 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief DGESVXX computes the solution to system of linear equations A * X = B for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESVXX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, */ +/* EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, */ +/* BERR, N_ERR_BNDS, ERR_BNDS_NORM, */ +/* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, */ +/* INFO ) */ + +/* CHARACTER EQUED, FACT, TRANS */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */ +/* $ N_ERR_BNDS */ +/* DOUBLE PRECISION RCOND, RPVGRW */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ X( LDX , * ),WORK( * ) */ +/* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), */ +/* $ ERR_BNDS_NORM( NRHS, * ), */ +/* $ ERR_BNDS_COMP( NRHS, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESVXX uses the LU factorization to compute the solution to a */ +/* > double precision system of linear equations A * X = B, where A is an */ +/* > N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > If requested, both normwise and maximum componentwise error bounds */ +/* > are returned. DGESVXX will return a solution with a tiny */ +/* > guaranteed error (O(eps) where eps is the working machine */ +/* > precision) unless the matrix is very ill-conditioned, in which */ +/* > case a warning is returned. Relevant condition numbers also are */ +/* > calculated and returned. */ +/* > */ +/* > DGESVXX accepts user-provided factorizations and equilibration */ +/* > factors; see the definitions of the FACT and EQUED options. */ +/* > Solving with refinement and using a factorization from a previous */ +/* > DGESVXX call will also produce a solution with either O(eps) */ +/* > errors or warnings, but we cannot make that claim for general */ +/* > user-provided factorizations and equilibration factors if they */ +/* > differ from what DGESVXX would itself produce. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ +/* > the system: */ +/* > */ +/* > TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* > TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* > TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ +/* > */ +/* > Whether or not the system will be equilibrated depends on the */ +/* > scaling of the matrix A, but if equilibration is used, A is */ +/* > overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* > or diag(C)*B (if TRANS = 'T' or 'C'). */ +/* > */ +/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ +/* > the matrix A (after equilibration if FACT = 'E') as */ +/* > */ +/* > A = P * L * U, */ +/* > */ +/* > where P is a permutation matrix, L is a unit lower triangular */ +/* > matrix, and U is upper triangular. */ +/* > */ +/* > 3. If some U(i,i)=0, so that U is exactly singular, then the */ +/* > routine returns with INFO = i. Otherwise, the factored form of A */ +/* > is used to estimate the condition number of the matrix A (see */ +/* > argument RCOND). If the reciprocal of the condition number is less */ +/* > than machine precision, the routine still goes on to solve for X */ +/* > and compute error bounds as described below. */ +/* > */ +/* > 4. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ +/* > the routine will use iterative refinement to try to get a small */ +/* > error and error bounds. Refinement calculates the residual to at */ +/* > least twice the working precision. */ +/* > */ +/* > 6. If equilibration was used, the matrix X is premultiplied by */ +/* > diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* > that it solves the original system before equilibration. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \verbatim */ +/* > Some optional parameters are bundled in the PARAMS array. These */ +/* > settings determine how refinement is performed, but often the */ +/* > defaults are acceptable. If the defaults are acceptable, users */ +/* > can pass NPARAMS = 0 which prevents the source code from accessing */ +/* > the PARAMS argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of the matrix A is */ +/* > supplied on entry, and if not, whether the matrix A should be */ +/* > equilibrated before it is factored. */ +/* > = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* > If EQUED is not 'N', the matrix A has been */ +/* > equilibrated with scaling factors given by R and C. */ +/* > A, AF, and IPIV are not modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > = 'E': The matrix A will be equilibrated if necessary, then */ +/* > copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is */ +/* > not 'N', then A must have been equilibrated by the scaling */ +/* > factors in R and/or C. A is not modified if FACT = 'F' or */ +/* > 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ +/* > */ +/* > On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* > EQUED = 'R': A := diag(R) * A */ +/* > EQUED = 'C': A := A * diag(C) */ +/* > EQUED = 'B': A := diag(R) * A * diag(C). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AF */ +/* > \verbatim */ +/* > AF is DOUBLE PRECISION array, dimension (LDAF,N) */ +/* > If FACT = 'F', then AF is an input argument and on entry */ +/* > contains the factors L and U from the factorization */ +/* > A = P*L*U as computed by DGETRF. If EQUED .ne. 'N', then */ +/* > AF is the factored form of the equilibrated matrix A. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the equilibrated matrix A (see the description of A for */ +/* > the form of the equilibrated matrix). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > as computed by DGETRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the equilibrated matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done. */ +/* > = 'N': No equilibration (always true if FACT = 'N'). */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* > output argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. R is an input argument if FACT = 'F'; */ +/* > otherwise, R is an output argument. If FACT = 'F' and */ +/* > EQUED = 'R' or 'B', each element of R must be positive. */ +/* > If R is output, each element of R is a power of the radix. */ +/* > If R is input, each element of R should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. C is an input argument if FACT = 'F'; */ +/* > otherwise, C is an output argument. If FACT = 'F' and */ +/* > EQUED = 'C' or 'B', each element of C must be positive. */ +/* > If C is output, each element of C is a power of the radix. */ +/* > If C is input, each element of C should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, */ +/* > if EQUED = 'N', B is not modified; */ +/* > if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* > diag(R)*B; */ +/* > if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* > overwritten by diag(C)*B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (LDX,NRHS) */ +/* > If INFO = 0, the N-by-NRHS solution matrix X to the original */ +/* > system of equations. Note that A and B are modified on exit */ +/* > if EQUED .ne. 'N', and the solution to the equilibrated system is */ +/* > inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */ +/* > inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RPVGRW */ +/* > \verbatim */ +/* > RPVGRW is DOUBLE PRECISION */ +/* > Reciprocal pivot growth. On exit, this contains the reciprocal */ +/* > pivot growth factor norm(A)/norm(U). The "f2cmax absolute element" */ +/* > norm is used. If this is much less than 1, then the stability of */ +/* > the LU factorization of the (equilibrated) matrix A could be poor. */ +/* > This also means that the solution X, estimated condition numbers, */ +/* > and error bounds could be unreliable. If factorization fails with */ +/* > 0 for the leading INFO columns of A. In DGESVX, this quantity is */ +/* > returned in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > Componentwise relative backward error. This is the */ +/* > componentwise relative backward error of each solution vector X(j) */ +/* > (i.e., the smallest relative change in any element of A or B that */ +/* > makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_ERR_BNDS */ +/* > \verbatim */ +/* > N_ERR_BNDS is INTEGER */ +/* > Number of error bounds to return for each right hand side */ +/* > and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* > ERR_BNDS_COMP below. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_NORM */ +/* > \verbatim */ +/* > ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_COMP */ +/* > \verbatim */ +/* > ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERR_BNDS_COMP is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NPARAMS */ +/* > \verbatim */ +/* > NPARAMS is INTEGER */ +/* > Specifies the number of parameters set in PARAMS. If <= 0, the */ +/* > PARAMS array is never referenced and default values are used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PARAMS */ +/* > \verbatim */ +/* > PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) */ +/* > Specifies algorithm parameters. If an entry is < 0.0, then */ +/* > that entry will be filled with default value used for that */ +/* > parameter. Only positions up to NPARAMS are accessed; defaults */ +/* > are used for higher-numbered parameters. */ +/* > */ +/* > PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* > refinement or not. */ +/* > Default: 1.0D+0 */ +/* > = 0.0: No refinement is performed, and no error bounds are */ +/* > computed. */ +/* > = 1.0: Use the extra-precise refinement algorithm. */ +/* > (other values are reserved for future use) */ +/* > */ +/* > PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* > computations allowed for refinement. */ +/* > Default: 10 */ +/* > Aggressive: Set to 100 to permit convergence using approximate */ +/* > factorizations or factorizations other than LU. If */ +/* > the factorization uses a technique other than */ +/* > Gaussian elimination, the guarantees in */ +/* > err_bnds_norm and err_bnds_comp may no longer be */ +/* > trustworthy. */ +/* > */ +/* > PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* > will attempt to find a solution with small componentwise */ +/* > relative error in the double-precision algorithm. Positive */ +/* > is true, 0.0 is false. */ +/* > Default: 1.0 (attempt componentwise convergence) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. The solution to every right-hand side is */ +/* > guaranteed. */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly singular, so */ +/* > the solution and error bounds could not be computed. RCOND = 0 */ +/* > is returned. */ +/* > = N+J: The solution corresponding to the Jth right-hand side is */ +/* > not guaranteed. The solutions corresponding to other right- */ +/* > hand sides K with K > J may not be guaranteed as well, but */ +/* > only the first such right-hand side is reported. If a small */ +/* > componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* > the Jth right-hand side is the first with a normwise error */ +/* > bound that is not guaranteed (the smallest J such */ +/* > that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* > the Jth right-hand side is the first with either a normwise or */ +/* > componentwise error bound that is not guaranteed (the smallest */ +/* > J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* > ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* > ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* > about all of the right-hand sides check ERR_BNDS_NORM or */ +/* > ERR_BNDS_COMP. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup doubleGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, + integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * + rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + doublereal amax; + extern doublereal dla_gerpvgrw__(integer *, integer *, doublereal *, + integer *, doublereal *, integer *); + integer j; + extern logical lsame_(char *, char *); + doublereal rcmin, rcmax; + logical equil; + extern doublereal dlamch_(char *); + extern /* Subroutine */ int dlaqge_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, char *); + doublereal colcnd; + logical nofact; + extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, + integer *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen); + doublereal bignum; + integer infequ; + logical colequ; + extern /* Subroutine */ int dgetrs_(char *, integer *, integer *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + doublereal rowcnd; + logical notran; + doublereal smlnum; + logical rowequ; + extern /* Subroutine */ int dlascl2_(integer *, integer *, doublereal *, + doublereal *, integer *), dgeequb_(integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *), dgerfsx_(char *, char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + equil = lsame_(fact, "E"); + notran = lsame_(trans, "N"); + smlnum = dlamch_("Safe minimum"); + bignum = 1. / smlnum; + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + +/* Default is failure. If an input parameter is wrong or */ +/* factorization fails, make everything look horrible. Only the */ +/* pivot growth is set here, the rest is initialized in DGERFSX. */ + + *rpvgrw = 0.; + +/* Test the input parameters. PARAMS is not tested until DGERFSX. */ + + if (! nofact && ! equil && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (lsame_(fact, "F") && ! (rowequ || colequ + || lsame_(equed, "N"))) { + *info = -10; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[j]; + rcmax = f2cmax(d__1,d__2); +/* L10: */ + } + if (rcmin <= 0.) { + *info = -11; + } else if (*n > 0) { + rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + rowcnd = 1.; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = f2cmax(d__1,d__2); +/* L20: */ + } + if (rcmin <= 0.) { + *info = -12; + } else if (*n > 0) { + colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + colcnd = 1.; + } + } + if (*info == 0) { + if (*ldb < f2cmax(1,*n)) { + *info = -14; + } else if (*ldx < f2cmax(1,*n)) { + *info = -16; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGESVXX", &i__1, (ftnlen)7); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + dgeequb_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, + &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + dlaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, & + colcnd, &amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + +/* If the scaling factors are not applied, set them to 1.0. */ + + if (! rowequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + r__[j] = 1.; + } + } + if (! colequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 1.; + } + } + } + +/* Scale the right-hand side. */ + + if (notran) { + if (rowequ) { + dlascl2_(n, nrhs, &r__[1], &b[b_offset], ldb); + } + } else { + if (colequ) { + dlascl2_(n, nrhs, &c__[1], &b[b_offset], ldb); + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + dlacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf); + dgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Pivot in column INFO is exactly 0 */ +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + *rpvgrw = dla_gerpvgrw__(n, info, &a[a_offset], lda, &af[ + af_offset], ldaf); + return 0; + } + } + +/* Compute the reciprocal pivot growth factor RPVGRW. */ + + *rpvgrw = dla_gerpvgrw__(n, n, &a[a_offset], lda, &af[af_offset], ldaf); + +/* Compute the solution matrix X. */ + + dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + dgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + dgerfsx_(trans, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & + ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb, &x[x_offset], ldx, + rcond, &berr[1], n_err_bnds__, &err_bnds_norm__[ + err_bnds_norm_offset], &err_bnds_comp__[err_bnds_comp_offset], + nparams, ¶ms[1], &work[1], &iwork[1], info); + +/* Scale solutions. */ + + if (colequ && notran) { + dlascl2_(n, nrhs, &c__[1], &x[x_offset], ldx); + } else if (rowequ && ! notran) { + dlascl2_(n, nrhs, &r__[1], &x[x_offset], ldx); + } + + return 0; + +/* End of DGESVXX */ +} /* dgesvxx_ */ + diff --git a/lapack-netlib/SRC/dgetc2.c b/lapack-netlib/SRC/dgetc2.c new file mode 100644 index 000000000..e6abe6eb8 --- /dev/null +++ b/lapack-netlib/SRC/dgetc2.c @@ -0,0 +1,646 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGETC2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) */ + +/* INTEGER INFO, LDA, N */ +/* INTEGER IPIV( * ), JPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETC2 computes an LU factorization with complete pivoting of the */ +/* > n-by-n matrix A. The factorization has the form A = P * L * U * Q, */ +/* > where P and Q are permutation matrices, L is lower triangular with */ +/* > unit diagonal elements and U is upper triangular. */ +/* > */ +/* > This is the Level 2 BLAS algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA, N) */ +/* > On entry, the n-by-n matrix A to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U*Q; the unit diagonal elements of L are not stored. */ +/* > If U(k, k) appears to be less than SMIN, U(k, k) is given the */ +/* > value of SMIN, i.e., giving a nonsingular perturbed system. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension(N). */ +/* > The pivot indices; for 1 <= i <= N, row i of the */ +/* > matrix has been interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] JPIV */ +/* > \verbatim */ +/* > JPIV is INTEGER array, dimension(N). */ +/* > The pivot indices; for 1 <= j <= N, column j of the */ +/* > matrix has been interchanged with column JPIV(j). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > > 0: if INFO = k, U(k, k) is likely to produce overflow if */ +/* > we try to solve for x in Ax = b. So U is perturbed to */ +/* > avoid the overflow. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup doubleGEauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* > Umea University, S-901 87 Umea, Sweden. */ + +/* ===================================================================== */ +/* Subroutine */ int dgetc2_(integer *n, doublereal *a, integer *lda, integer + *ipiv, integer *jpiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal smin, xmax; + integer i__, j; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *), dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *); + integer ip, jp; + doublereal bignum, smlnum, eps; + integer ipv, jpv; + + +/* -- LAPACK auxiliary routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + --jpiv; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Set constants to control overflow */ + + eps = dlamch_("P"); + smlnum = dlamch_("S") / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Handle the case N=1 by itself */ + + if (*n == 1) { + ipiv[1] = 1; + jpiv[1] = 1; + if ((d__1 = a[a_dim1 + 1], abs(d__1)) < smlnum) { + *info = 1; + a[a_dim1 + 1] = smlnum; + } + return 0; + } + +/* Factorize A using complete pivoting. */ +/* Set pivots less than SMIN to SMIN. */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Find f2cmax element in matrix A */ + + xmax = 0.; + i__2 = *n; + for (ip = i__; ip <= i__2; ++ip) { + i__3 = *n; + for (jp = i__; jp <= i__3; ++jp) { + if ((d__1 = a[ip + jp * a_dim1], abs(d__1)) >= xmax) { + xmax = (d__1 = a[ip + jp * a_dim1], abs(d__1)); + ipv = ip; + jpv = jp; + } +/* L10: */ + } +/* L20: */ + } + if (i__ == 1) { +/* Computing MAX */ + d__1 = eps * xmax; + smin = f2cmax(d__1,smlnum); + } + +/* Swap rows */ + + if (ipv != i__) { + dswap_(n, &a[ipv + a_dim1], lda, &a[i__ + a_dim1], lda); + } + ipiv[i__] = ipv; + +/* Swap columns */ + + if (jpv != i__) { + dswap_(n, &a[jpv * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], & + c__1); + } + jpiv[i__] = jpv; + +/* Check for singularity */ + + if ((d__1 = a[i__ + i__ * a_dim1], abs(d__1)) < smin) { + *info = i__; + a[i__ + i__ * a_dim1] = smin; + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + a[j + i__ * a_dim1] /= a[i__ + i__ * a_dim1]; +/* L30: */ + } + i__2 = *n - i__; + i__3 = *n - i__; + dger_(&i__2, &i__3, &c_b10, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ + + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda); +/* L40: */ + } + + if ((d__1 = a[*n + *n * a_dim1], abs(d__1)) < smin) { + *info = *n; + a[*n + *n * a_dim1] = smin; + } + +/* Set last pivots to N */ + + ipiv[*n] = *n; + jpiv[*n] = *n; + + return 0; + +/* End of DGETC2 */ + +} /* dgetc2_ */ + diff --git a/lapack-netlib/SRC/dgetf2.c b/lapack-netlib/SRC/dgetf2.c new file mode 100644 index 000000000..066562932 --- /dev/null +++ b/lapack-netlib/SRC/dgetf2.c @@ -0,0 +1,620 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row + interchanges (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGETF2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETF2 computes an LU factorization of a general m-by-n matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the right-looking Level 2 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + integer i__, j; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sfmin; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal dlamch_(char *); + integer jp; + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETF2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Compute machine safe minimum */ + + sfmin = dlamch_("S"); + + i__1 = f2cmin(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + if (a[jp + j * a_dim1] != 0.) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) { + i__2 = *m - j; + d__1 = 1. / a[j + j * a_dim1]; + dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } else { + i__2 = *m - j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; +/* L20: */ + } + } + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < f2cmin(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + dger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + ( + j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + return 0; + +/* End of DGETF2 */ + +} /* dgetf2_ */ + diff --git a/lapack-netlib/SRC/dgetrf.c b/lapack-netlib/SRC/dgetrf.c new file mode 100644 index 000000000..230573617 --- /dev/null +++ b/lapack-netlib/SRC/dgetrf.c @@ -0,0 +1,645 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGETRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGETRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETRF computes an LU factorization of a general M-by-N matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the right-looking Level 3 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, j; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + integer iinfo; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer jb, nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *), dgetrf2_(integer *, + integer *, doublereal *, integer *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETRF", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= f2cmin(*m,*n)) { + +/* Use unblocked code. */ + + dgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = f2cmin(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = f2cmin(*m,*n) - j + 1; + jb = f2cmin(i__3,nb); + +/* Factor diagonal and subdiagonal blocks and test for exact */ +/* singularity. */ + + i__3 = *m - j + 1; + dgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = f2cmin(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b16, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &c_b19, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b16, &a[j + jb + (j + jb) * + a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of DGETRF */ + +} /* dgetrf_ */ + diff --git a/lapack-netlib/SRC/dgetrf2.c b/lapack-netlib/SRC/dgetrf2.c new file mode 100644 index 000000000..e991d5d7f --- /dev/null +++ b/lapack-netlib/SRC/dgetrf2.c @@ -0,0 +1,683 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGETRF2 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETRF2 computes an LU factorization of a general M-by-N matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the recursive version of the algorithm. It divides */ +/* > the matrix into four submatrices: */ +/* > */ +/* > [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 */ +/* > A = [ -----|----- ] with n1 = f2cmin(m,n)/2 */ +/* > [ A21 | A22 ] n2 = n-n1 */ +/* > */ +/* > [ A11 ] */ +/* > The subroutine calls itself to factor [ --- ], */ +/* > [ A12 ] */ +/* > [ A12 ] */ +/* > do the swaps on [ --- ], solve A12, update A22, */ +/* > [ A22 ] */ +/* > */ +/* > then calls itself to factor A22 and do the swaps on A21. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgetrf2_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + doublereal temp; + integer i__; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dgemm_(char *, char *, integer *, integer *, integer * + , doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer iinfo; + doublereal sfmin; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer n1, n2; + extern doublereal dlamch_(char *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dlaswp_( + integer *, doublereal *, integer *, integer *, integer *, integer + *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETRF2", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (*m == 1) { + +/* Use unblocked code for one row case */ +/* Just need to handle IPIV and INFO */ + + ipiv[1] = 1; + if (a[a_dim1 + 1] == 0.) { + *info = 1; + } + + } else if (*n == 1) { + +/* Use unblocked code for one column case */ + + +/* Compute machine safe minimum */ + + sfmin = dlamch_("S"); + +/* Find pivot and test for singularity */ + + i__ = idamax_(m, &a[a_dim1 + 1], &c__1); + ipiv[1] = i__; + if (a[i__ + a_dim1] != 0.) { + +/* Apply the interchange */ + + if (i__ != 1) { + temp = a[a_dim1 + 1]; + a[a_dim1 + 1] = a[i__ + a_dim1]; + a[i__ + a_dim1] = temp; + } + +/* Compute elements 2:M of the column */ + + if ((d__1 = a[a_dim1 + 1], abs(d__1)) >= sfmin) { + i__1 = *m - 1; + d__1 = 1. / a[a_dim1 + 1]; + dscal_(&i__1, &d__1, &a[a_dim1 + 2], &c__1); + } else { + i__1 = *m - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + 1 + a_dim1] /= a[a_dim1 + 1]; +/* L10: */ + } + } + + } else { + *info = 1; + } + + } else { + +/* Use recursive code */ + + n1 = f2cmin(*m,*n) / 2; + n2 = *n - n1; + +/* [ A11 ] */ +/* Factor [ --- ] */ +/* [ A21 ] */ + + dgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + +/* [ A12 ] */ +/* Apply interchanges to [ --- ] */ +/* [ A22 ] */ + + dlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], & + c__1); + +/* Solve A12 */ + + dtrsm_("L", "L", "N", "U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[( + n1 + 1) * a_dim1 + 1], lda); + +/* Update A22 */ + + i__1 = *m - n1; + dgemm_("N", "N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda, & + a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * + a_dim1], lda); + +/* Factor A22 */ + + i__1 = *m - n1; + dgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + + 1], &iinfo); + +/* Adjust INFO and the pivot indices */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + n1; + } + i__1 = f2cmin(*m,*n); + for (i__ = n1 + 1; i__ <= i__1; ++i__) { + ipiv[i__] += n1; +/* L20: */ + } + +/* Apply interchanges to A21 */ + + i__1 = n1 + 1; + i__2 = f2cmin(*m,*n); + dlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1); + + } + return 0; + +/* End of DGETRF2 */ + +} /* dgetrf2_ */ + diff --git a/lapack-netlib/SRC/dgetri.c b/lapack-netlib/SRC/dgetri.c new file mode 100644 index 000000000..a8d223285 --- /dev/null +++ b/lapack-netlib/SRC/dgetri.c @@ -0,0 +1,694 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGETRI */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGETRI + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, N */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETRI computes the inverse of a matrix using the LU factorization */ +/* > computed by DGETRF. */ +/* > */ +/* > This method inverts U and then computes inv(A) by solving the system */ +/* > inv(A)*L = inv(U) for inv(A). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the factors L and U from the factorization */ +/* > A = P*L*U as computed by DGETRF. */ +/* > On exit, if INFO = 0, the inverse of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For optimal performance LWORK >= N*NB, where NB is */ +/* > the optimal blocksize returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero; the matrix is */ +/* > singular and its inverse could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgetri_(integer *n, doublereal *a, integer *lda, integer + *ipiv, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + dgemv_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + integer nbmin; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *), dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + integer jb, nb, jj, jp, nn; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork; + extern /* Subroutine */ int dtrtri_(char *, char *, integer *, doublereal + *, integer *, integer *); + integer lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DGETRI", " ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*lda < f2cmax(1,*n)) { + *info = -3; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETRI", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form inv(U). If INFO > 0 from DTRTRI, then U is singular, */ +/* and the inverse is not computed. */ + + dtrtri_("Upper", "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { +/* Computing MAX */ + i__1 = ldwork * nb; + iws = f2cmax(i__1,1); + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGETRI", " ", n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } else { + iws = *n; + } + +/* Solve the equation inv(A)*L = inv(U) for inv(A). */ + + if (nb < nbmin || nb >= *n) { + +/* Use unblocked code. */ + + for (j = *n; j >= 1; --j) { + +/* Copy current column of L to WORK and replace with zeros. */ + + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + work[i__] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } + +/* Compute current column of inv(A). */ + + if (j < *n) { + i__1 = *n - j; + dgemv_("No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 + + 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 + + 1], &c__1); + } +/* L20: */ + } + } else { + +/* Use blocked code. */ + + nn = (*n - 1) / nb * nb + 1; + i__1 = -nb; + for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *n - j + 1; + jb = f2cmin(i__2,i__3); + +/* Copy current block column of L to WORK and replace with */ +/* zeros. */ + + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = *n; + for (i__ = jj + 1; i__ <= i__3; ++i__) { + work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1]; + a[i__ + jj * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* Compute current block column of inv(A). */ + + if (j + jb <= *n) { + i__2 = *n - j - jb + 1; + dgemm_("No transpose", "No transpose", n, &jb, &i__2, &c_b20, + &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], & + ldwork, &c_b22, &a[j * a_dim1 + 1], lda); + } + dtrsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b22, & + work[j], &ldwork, &a[j * a_dim1 + 1], lda); +/* L50: */ + } + } + +/* Apply column interchanges. */ + + for (j = *n - 1; j >= 1; --j) { + jp = ipiv[j]; + if (jp != j) { + dswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1); + } +/* L60: */ + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGETRI */ + +} /* dgetri_ */ + diff --git a/lapack-netlib/SRC/dgetrs.c b/lapack-netlib/SRC/dgetrs.c new file mode 100644 index 000000000..c58328d29 --- /dev/null +++ b/lapack-netlib/SRC/dgetrs.c @@ -0,0 +1,620 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGETRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGETRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETRS solves a system of linear equations */ +/* > A * X = B or A**T * X = B */ +/* > with a general N-by-N matrix A using the LU factorization computed */ +/* > by DGETRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T* X = B (Transpose) */ +/* > = 'C': A**T* X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by DGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the right hand side matrix B. */ +/* > On exit, the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * + ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), xerbla_( + char *, integer *, ftnlen), dlaswp_(integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *); + logical notran; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* Solve A * X = B. */ + +/* Apply row interchanges to the right hand sides. */ + + dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b12, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* Solve A**T * X = B. */ + +/* Solve U**T *X = B, overwriting B with X. */ + + dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L**T *X = B, overwriting B with X. */ + + dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of DGETRS */ + +} /* dgetrs_ */ + diff --git a/lapack-netlib/SRC/dgetsls.c b/lapack-netlib/SRC/dgetsls.c new file mode 100644 index 000000000..db3c76926 --- /dev/null +++ b/lapack-netlib/SRC/dgetsls.c @@ -0,0 +1,931 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DGETSLS */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, */ +/* $ WORK, LWORK, INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETSLS solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, using a tall skinny QR or short wide LQ */ +/* > factorization of A. It is assumed that A has full rank. */ +/* > */ +/* > */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ +/* > an undetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'T' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'T': the linear system involves A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > A is overwritten by details of its QR or LQ */ +/* > factorization as returned by DGEQR or DGELQ. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'T'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors. */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */ +/* > or optimal, if query was assumed) LWORK. */ +/* > See LWORK for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1 or -2, then a workspace query is assumed. */ +/* > If LWORK = -1, the routine calculates optimal size of WORK for the */ +/* > optimal performance and returns this value in WORK(1). */ +/* > If LWORK = -2, the routine calculates minimal size of WORK and */ +/* > returns this value in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup doubleGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dgetsls_(char *trans, integer *m, integer *n, integer * + nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + doublereal anrm, bnrm; + logical tran; + integer brow, tszm, tszo, info2, i__, j, iascl, ibscl; + extern /* Subroutine */ int dgelq_(integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgeqr_(integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + integer minmn, maxmn; + doublereal workq[1]; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + doublereal tq[5]; + extern /* Subroutine */ int dgemlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), dlaset_(char *, integer *, integer *, doublereal + *, doublereal *, doublereal *, integer *), xerbla_(char *, + integer *, ftnlen), dgemqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer scllen; + doublereal bignum, smlnum; + integer wsizem, wsizeo; + logical lquery; + extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *); + integer lw1, lw2, mnk, lwm, lwo; + + +/* -- LAPACK driver routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + maxmn = f2cmax(*m,*n); + mnk = f2cmax(minmn,*nrhs); + tran = lsame_(trans, "T"); + + lquery = *lwork == -1 || *lwork == -2; + if (! (lsame_(trans, "N") || lsame_(trans, "T"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } + } + + if (*info == 0) { + +/* Determine the block size and minimum LWORK */ + + if (*m >= *n) { + dgeqr_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2); + tszo = (integer) tq[0]; + lwo = (integer) workq[0]; + dgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszo, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwo, i__2 = (integer) workq[0]; + lwo = f2cmax(i__1,i__2); + dgeqr_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2); + tszm = (integer) tq[0]; + lwm = (integer) workq[0]; + dgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszm, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwm, i__2 = (integer) workq[0]; + lwm = f2cmax(i__1,i__2); + wsizeo = tszo + lwo; + wsizem = tszm + lwm; + } else { + dgelq_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2); + tszo = (integer) tq[0]; + lwo = (integer) workq[0]; + dgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszo, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwo, i__2 = (integer) workq[0]; + lwo = f2cmax(i__1,i__2); + dgelq_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2); + tszm = (integer) tq[0]; + lwm = (integer) workq[0]; + dgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszm, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwm, i__2 = (integer) workq[0]; + lwm = f2cmax(i__1,i__2); + wsizeo = tszo + lwo; + wsizem = tszm + lwm; + } + + if (*lwork < wsizem && ! lquery) { + *info = -10; + } + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETSLS", &i__1, (ftnlen)7); + work[1] = (doublereal) wsizeo; + return 0; + } + if (lquery) { + if (*lwork == -1) { + work[1] = (real) wsizeo; + } + if (*lwork == -2) { + work[1] = (real) wsizem; + } + return 0; + } + if (*lwork < wsizeo) { + lw1 = tszm; + lw2 = lwm; + } else { + lw1 = tszo; + lw2 = lwo; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + dlaset_("FULL", &i__1, nrhs, &c_b23, &c_b23, &b[b_offset], ldb); + return 0; + } + +/* Get machine parameters */ + + smlnum = dlamch_("S") / dlamch_("P"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + dlaset_("F", &maxmn, nrhs, &c_b23, &c_b23, &b[b_offset], ldb); + goto L50; + } + + brow = *m; + if (tran) { + brow = *n; + } + bnrm = dlange_("M", &brow, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* compute QR factorization of A */ + + dgeqr_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, + info); + if (! tran) { + +/* Least-Squares Problem f2cmin || A * X - B || */ + +/* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */ + + dgemqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + +/* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + dtrtrs_("U", "N", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], + ldb, info); + if (*info > 0) { + return 0; + } + scllen = *n; + } else { + +/* Overdetermined system of equations A**T * X = B */ + +/* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + dtrtrs_("U", "T", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], + ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */ + + dgemqr_("L", "N", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + + scllen = *m; + + } + + } else { + +/* Compute LQ factorization of A */ + + dgelq_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, + info); + +/* workspace at least M, optimally M*NB. */ + + if (! tran) { + +/* underdetermined system of equations A * X = B */ + +/* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + dtrtrs_("L", "N", "N", m, nrhs, &a[a_offset], lda, &b[b_offset], + ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(M+1:N,1:NRHS) = 0 */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */ + + dgemlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *n; + + } else { + +/* overdetermined system f2cmin || A**T * X - B || */ + +/* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */ + + dgemlq_("L", "N", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + +L50: + work[1] = (doublereal) (tszo + lwo); + return 0; + +/* End of DGETSLS */ + +} /* dgetsls_ */ +