Use new algorithms for computing Givens rotations (Reference-LAPACK PR631)tags/v0.3.22^2
| @@ -30,7 +30,7 @@ | |||||
| !> The mathematical formulas used for C and S are | !> The mathematical formulas used for C and S are | ||||
| !> | !> | ||||
| !> sgn(x) = { x / |x|, x != 0 | !> sgn(x) = { x / |x|, x != 0 | ||||
| !> { 1, x = 0 | |||||
| !> { 1, x = 0 | |||||
| !> | !> | ||||
| !> R = sgn(F) * sqrt(|F|**2 + |G|**2) | !> R = sgn(F) * sqrt(|F|**2 + |G|**2) | ||||
| !> | !> | ||||
| @@ -38,19 +38,20 @@ | |||||
| !> | !> | ||||
| !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) | !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) | ||||
| !> | !> | ||||
| !> Special conditions: | |||||
| !> If G=0, then C=1 and S=0. | |||||
| !> If F=0, then C=0 and S is chosen so that R is real. | |||||
| !> | |||||
| !> When F and G are real, the formulas simplify to C = F/R and | !> When F and G are real, the formulas simplify to C = F/R and | ||||
| !> S = G/R, and the returned values of C, S, and R should be | !> S = G/R, and the returned values of C, S, and R should be | ||||
| !> identical to those returned by CLARTG. | |||||
| !> identical to those returned by SLARTG. | |||||
| !> | !> | ||||
| !> The algorithm used to compute these quantities incorporates scaling | !> The algorithm used to compute these quantities incorporates scaling | ||||
| !> to avoid overflow or underflow in computing the square root of the | !> to avoid overflow or underflow in computing the square root of the | ||||
| !> sum of squares. | !> sum of squares. | ||||
| !> | !> | ||||
| !> This is a faster version of the BLAS1 routine CROTG, except for | |||||
| !> the following differences: | |||||
| !> F and G are unchanged on return. | |||||
| !> If G=0, then C=1 and S=0. | |||||
| !> If F=0, then C=0 and S is chosen so that R is real. | |||||
| !> This is the same routine CROTG fom BLAS1, except that | |||||
| !> F and G are unchanged on return. | |||||
| !> | !> | ||||
| !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. | !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. | ||||
| !> \endverbatim | !> \endverbatim | ||||
| @@ -91,22 +92,19 @@ | |||||
| ! Authors: | ! Authors: | ||||
| ! ======== | ! ======== | ||||
| ! | ! | ||||
| !> \author Edward Anderson, Lockheed Martin | |||||
| !> \author Weslley Pereira, University of Colorado Denver, USA | |||||
| ! | ! | ||||
| !> \date August 2016 | |||||
| !> \date December 2021 | |||||
| ! | ! | ||||
| !> \ingroup OTHERauxiliary | !> \ingroup OTHERauxiliary | ||||
| ! | ! | ||||
| !> \par Contributors: | |||||
| ! ================== | |||||
| !> | |||||
| !> Weslley Pereira, University of Colorado Denver, USA | |||||
| ! | |||||
| !> \par Further Details: | !> \par Further Details: | ||||
| ! ===================== | ! ===================== | ||||
| !> | !> | ||||
| !> \verbatim | !> \verbatim | ||||
| !> | !> | ||||
| !> Based on the algorithm from | |||||
| !> | |||||
| !> Anderson E. (2017) | !> Anderson E. (2017) | ||||
| !> Algorithm 978: Safe Scaling in the Level 1 BLAS | !> Algorithm 978: Safe Scaling in the Level 1 BLAS | ||||
| !> ACM Trans Math Softw 44:1--28 | !> ACM Trans Math Softw 44:1--28 | ||||
| @@ -117,7 +115,7 @@ | |||||
| subroutine CLARTG( f, g, c, s, r ) | subroutine CLARTG( f, g, c, s, r ) | ||||
| use LA_CONSTANTS, & | use LA_CONSTANTS, & | ||||
| only: wp=>sp, zero=>szero, one=>sone, two=>stwo, czero, & | only: wp=>sp, zero=>szero, one=>sone, two=>stwo, czero, & | ||||
| rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax | |||||
| safmin=>ssafmin, safmax=>ssafmax | |||||
| ! | ! | ||||
| ! -- LAPACK auxiliary routine -- | ! -- LAPACK auxiliary routine -- | ||||
| ! -- LAPACK is a software package provided by Univ. of Tennessee, -- | ! -- LAPACK is a software package provided by Univ. of Tennessee, -- | ||||
| @@ -129,7 +127,7 @@ subroutine CLARTG( f, g, c, s, r ) | |||||
| complex(wp) f, g, r, s | complex(wp) f, g, r, s | ||||
| ! .. | ! .. | ||||
| ! .. Local Scalars .. | ! .. Local Scalars .. | ||||
| real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w | |||||
| real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax | |||||
| complex(wp) :: fs, gs, t | complex(wp) :: fs, gs, t | ||||
| ! .. | ! .. | ||||
| ! .. Intrinsic Functions .. | ! .. Intrinsic Functions .. | ||||
| @@ -141,6 +139,9 @@ subroutine CLARTG( f, g, c, s, r ) | |||||
| ! .. Statement Function definitions .. | ! .. Statement Function definitions .. | ||||
| ABSSQ( t ) = real( t )**2 + aimag( t )**2 | ABSSQ( t ) = real( t )**2 + aimag( t )**2 | ||||
| ! .. | ! .. | ||||
| ! .. Constants .. | |||||
| rtmin = sqrt( safmin ) | |||||
| ! .. | |||||
| ! .. Executable Statements .. | ! .. Executable Statements .. | ||||
| ! | ! | ||||
| if( g == czero ) then | if( g == czero ) then | ||||
| @@ -149,30 +150,43 @@ subroutine CLARTG( f, g, c, s, r ) | |||||
| r = f | r = f | ||||
| else if( f == czero ) then | else if( f == czero ) then | ||||
| c = zero | c = zero | ||||
| g1 = max( abs(real(g)), abs(aimag(g)) ) | |||||
| if( g1 > rtmin .and. g1 < rtmax ) then | |||||
| if( real(g) == zero ) then | |||||
| r = abs(aimag(g)) | |||||
| s = conjg( g ) / r | |||||
| elseif( aimag(g) == zero ) then | |||||
| r = abs(real(g)) | |||||
| s = conjg( g ) / r | |||||
| else | |||||
| g1 = max( abs(real(g)), abs(aimag(g)) ) | |||||
| rtmax = sqrt( safmax/2 ) | |||||
| if( g1 > rtmin .and. g1 < rtmax ) then | |||||
| ! | ! | ||||
| ! Use unscaled algorithm | ! Use unscaled algorithm | ||||
| ! | ! | ||||
| g2 = ABSSQ( g ) | |||||
| d = sqrt( g2 ) | |||||
| s = conjg( g ) / d | |||||
| r = d | |||||
| else | |||||
| ! The following two lines can be replaced by `d = abs( g )`. | |||||
| ! This algorithm do not use the intrinsic complex abs. | |||||
| g2 = ABSSQ( g ) | |||||
| d = sqrt( g2 ) | |||||
| s = conjg( g ) / d | |||||
| r = d | |||||
| else | |||||
| ! | ! | ||||
| ! Use scaled algorithm | ! Use scaled algorithm | ||||
| ! | ! | ||||
| u = min( safmax, max( safmin, g1 ) ) | |||||
| uu = one / u | |||||
| gs = g*uu | |||||
| g2 = ABSSQ( gs ) | |||||
| d = sqrt( g2 ) | |||||
| s = conjg( gs ) / d | |||||
| r = d*u | |||||
| u = min( safmax, max( safmin, g1 ) ) | |||||
| gs = g / u | |||||
| ! The following two lines can be replaced by `d = abs( gs )`. | |||||
| ! This algorithm do not use the intrinsic complex abs. | |||||
| g2 = ABSSQ( gs ) | |||||
| d = sqrt( g2 ) | |||||
| s = conjg( gs ) / d | |||||
| r = d*u | |||||
| end if | |||||
| end if | end if | ||||
| else | else | ||||
| f1 = max( abs(real(f)), abs(aimag(f)) ) | f1 = max( abs(real(f)), abs(aimag(f)) ) | ||||
| g1 = max( abs(real(g)), abs(aimag(g)) ) | g1 = max( abs(real(g)), abs(aimag(g)) ) | ||||
| rtmax = sqrt( safmax/4 ) | |||||
| if( f1 > rtmin .and. f1 < rtmax .and. & | if( f1 > rtmin .and. f1 < rtmax .and. & | ||||
| g1 > rtmin .and. g1 < rtmax ) then | g1 > rtmin .and. g1 < rtmax ) then | ||||
| ! | ! | ||||
| @@ -181,32 +195,51 @@ subroutine CLARTG( f, g, c, s, r ) | |||||
| f2 = ABSSQ( f ) | f2 = ABSSQ( f ) | ||||
| g2 = ABSSQ( g ) | g2 = ABSSQ( g ) | ||||
| h2 = f2 + g2 | h2 = f2 + g2 | ||||
| if( f2 > rtmin .and. h2 < rtmax ) then | |||||
| d = sqrt( f2*h2 ) | |||||
| ! safmin <= f2 <= h2 <= safmax | |||||
| if( f2 >= h2 * safmin ) then | |||||
| ! safmin <= f2/h2 <= 1, and h2/f2 is finite | |||||
| c = sqrt( f2 / h2 ) | |||||
| r = f / c | |||||
| rtmax = rtmax * 2 | |||||
| if( f2 > rtmin .and. h2 < rtmax ) then | |||||
| ! safmin <= sqrt( f2*h2 ) <= safmax | |||||
| s = conjg( g ) * ( f / sqrt( f2*h2 ) ) | |||||
| else | |||||
| s = conjg( g ) * ( r / h2 ) | |||||
| end if | |||||
| else | else | ||||
| d = sqrt( f2 )*sqrt( h2 ) | |||||
| ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. | |||||
| ! Moreover, | |||||
| ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, | |||||
| ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). | |||||
| ! Also, | |||||
| ! g2 >> f2, which means that h2 = g2. | |||||
| d = sqrt( f2 * h2 ) | |||||
| c = f2 / d | |||||
| if( c >= safmin ) then | |||||
| r = f / c | |||||
| else | |||||
| ! f2 / sqrt(f2 * h2) < safmin, then | |||||
| ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax | |||||
| r = f * ( h2 / d ) | |||||
| end if | |||||
| s = conjg( g ) * ( f / d ) | |||||
| end if | end if | ||||
| p = 1 / d | |||||
| c = f2*p | |||||
| s = conjg( g )*( f*p ) | |||||
| r = f*( h2*p ) | |||||
| else | else | ||||
| ! | ! | ||||
| ! Use scaled algorithm | ! Use scaled algorithm | ||||
| ! | ! | ||||
| u = min( safmax, max( safmin, f1, g1 ) ) | u = min( safmax, max( safmin, f1, g1 ) ) | ||||
| uu = one / u | |||||
| gs = g*uu | |||||
| gs = g / u | |||||
| g2 = ABSSQ( gs ) | g2 = ABSSQ( gs ) | ||||
| if( f1*uu < rtmin ) then | |||||
| if( f1 / u < rtmin ) then | |||||
| ! | ! | ||||
| ! f is not well-scaled when scaled by g1. | ! f is not well-scaled when scaled by g1. | ||||
| ! Use a different scaling for f. | ! Use a different scaling for f. | ||||
| ! | ! | ||||
| v = min( safmax, max( safmin, f1 ) ) | v = min( safmax, max( safmin, f1 ) ) | ||||
| vv = one / v | |||||
| w = v * uu | |||||
| fs = f*vv | |||||
| w = v / u | |||||
| fs = f / v | |||||
| f2 = ABSSQ( fs ) | f2 = ABSSQ( fs ) | ||||
| h2 = f2*w**2 + g2 | h2 = f2*w**2 + g2 | ||||
| else | else | ||||
| @@ -214,19 +247,43 @@ subroutine CLARTG( f, g, c, s, r ) | |||||
| ! Otherwise use the same scaling for f and g. | ! Otherwise use the same scaling for f and g. | ||||
| ! | ! | ||||
| w = one | w = one | ||||
| fs = f*uu | |||||
| fs = f / u | |||||
| f2 = ABSSQ( fs ) | f2 = ABSSQ( fs ) | ||||
| h2 = f2 + g2 | h2 = f2 + g2 | ||||
| end if | end if | ||||
| if( f2 > rtmin .and. h2 < rtmax ) then | |||||
| d = sqrt( f2*h2 ) | |||||
| ! safmin <= f2 <= h2 <= safmax | |||||
| if( f2 >= h2 * safmin ) then | |||||
| ! safmin <= f2/h2 <= 1, and h2/f2 is finite | |||||
| c = sqrt( f2 / h2 ) | |||||
| r = fs / c | |||||
| rtmax = rtmax * 2 | |||||
| if( f2 > rtmin .and. h2 < rtmax ) then | |||||
| ! safmin <= sqrt( f2*h2 ) <= safmax | |||||
| s = conjg( gs ) * ( fs / sqrt( f2*h2 ) ) | |||||
| else | |||||
| s = conjg( gs ) * ( r / h2 ) | |||||
| end if | |||||
| else | else | ||||
| d = sqrt( f2 )*sqrt( h2 ) | |||||
| ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. | |||||
| ! Moreover, | |||||
| ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, | |||||
| ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). | |||||
| ! Also, | |||||
| ! g2 >> f2, which means that h2 = g2. | |||||
| d = sqrt( f2 * h2 ) | |||||
| c = f2 / d | |||||
| if( c >= safmin ) then | |||||
| r = fs / c | |||||
| else | |||||
| ! f2 / sqrt(f2 * h2) < safmin, then | |||||
| ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax | |||||
| r = fs * ( h2 / d ) | |||||
| end if | |||||
| s = conjg( gs ) * ( fs / d ) | |||||
| end if | end if | ||||
| p = 1 / d | |||||
| c = ( f2*p )*w | |||||
| s = conjg( gs )*( fs*p ) | |||||
| r = ( fs*( h2*p ) )*u | |||||
| ! Rescale c and r | |||||
| c = c * w | |||||
| r = r * u | |||||
| end if | end if | ||||
| end if | end if | ||||
| return | return | ||||
| @@ -11,7 +11,7 @@ | |||||
| ! SUBROUTINE DLARTG( F, G, C, S, R ) | ! SUBROUTINE DLARTG( F, G, C, S, R ) | ||||
| ! | ! | ||||
| ! .. Scalar Arguments .. | ! .. Scalar Arguments .. | ||||
| ! REAL(wp) C, F, G, R, S | |||||
| ! REAL(wp) C, F, G, R, S | |||||
| ! .. | ! .. | ||||
| ! | ! | ||||
| !> \par Purpose: | !> \par Purpose: | ||||
| @@ -45,8 +45,6 @@ | |||||
| !> floating point operations (saves work in DBDSQR when | !> floating point operations (saves work in DBDSQR when | ||||
| !> there are zeros on the diagonal). | !> there are zeros on the diagonal). | ||||
| !> | !> | ||||
| !> If F exceeds G in magnitude, C will be positive. | |||||
| !> | |||||
| !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. | !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. | ||||
| !> \endverbatim | !> \endverbatim | ||||
| ! | ! | ||||
| @@ -112,7 +110,7 @@ | |||||
| subroutine DLARTG( f, g, c, s, r ) | subroutine DLARTG( f, g, c, s, r ) | ||||
| use LA_CONSTANTS, & | use LA_CONSTANTS, & | ||||
| only: wp=>dp, zero=>dzero, half=>dhalf, one=>done, & | only: wp=>dp, zero=>dzero, half=>dhalf, one=>done, & | ||||
| rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax | |||||
| safmin=>dsafmin, safmax=>dsafmax | |||||
| ! | ! | ||||
| ! -- LAPACK auxiliary routine -- | ! -- LAPACK auxiliary routine -- | ||||
| ! -- LAPACK is a software package provided by Univ. of Tennessee, -- | ! -- LAPACK is a software package provided by Univ. of Tennessee, -- | ||||
| @@ -123,11 +121,15 @@ subroutine DLARTG( f, g, c, s, r ) | |||||
| real(wp) :: c, f, g, r, s | real(wp) :: c, f, g, r, s | ||||
| ! .. | ! .. | ||||
| ! .. Local Scalars .. | ! .. Local Scalars .. | ||||
| real(wp) :: d, f1, fs, g1, gs, p, u, uu | |||||
| real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax | |||||
| ! .. | ! .. | ||||
| ! .. Intrinsic Functions .. | ! .. Intrinsic Functions .. | ||||
| intrinsic :: abs, sign, sqrt | intrinsic :: abs, sign, sqrt | ||||
| ! .. | ! .. | ||||
| ! .. Constants .. | |||||
| rtmin = sqrt( safmin ) | |||||
| rtmax = sqrt( safmax/2 ) | |||||
| ! .. | |||||
| ! .. Executable Statements .. | ! .. Executable Statements .. | ||||
| ! | ! | ||||
| f1 = abs( f ) | f1 = abs( f ) | ||||
| @@ -143,20 +145,18 @@ subroutine DLARTG( f, g, c, s, r ) | |||||
| else if( f1 > rtmin .and. f1 < rtmax .and. & | else if( f1 > rtmin .and. f1 < rtmax .and. & | ||||
| g1 > rtmin .and. g1 < rtmax ) then | g1 > rtmin .and. g1 < rtmax ) then | ||||
| d = sqrt( f*f + g*g ) | d = sqrt( f*f + g*g ) | ||||
| p = one / d | |||||
| c = f1*p | |||||
| s = g*sign( p, f ) | |||||
| c = f1 / d | |||||
| r = sign( d, f ) | r = sign( d, f ) | ||||
| s = g / r | |||||
| else | else | ||||
| u = min( safmax, max( safmin, f1, g1 ) ) | u = min( safmax, max( safmin, f1, g1 ) ) | ||||
| uu = one / u | |||||
| fs = f*uu | |||||
| gs = g*uu | |||||
| fs = f / u | |||||
| gs = g / u | |||||
| d = sqrt( fs*fs + gs*gs ) | d = sqrt( fs*fs + gs*gs ) | ||||
| p = one / d | |||||
| c = abs( fs )*p | |||||
| s = gs*sign( p, f ) | |||||
| r = sign( d, f )*u | |||||
| c = abs( fs ) / d | |||||
| r = sign( d, f ) | |||||
| s = gs / r | |||||
| r = r*u | |||||
| end if | end if | ||||
| return | return | ||||
| end subroutine | end subroutine | ||||
| @@ -35,7 +35,7 @@ | |||||
| !> square root of the sum of squares. | !> square root of the sum of squares. | ||||
| !> | !> | ||||
| !> This version is discontinuous in R at F = 0 but it returns the same | !> This version is discontinuous in R at F = 0 but it returns the same | ||||
| !> C and S as SLARTG for complex inputs (F,0) and (G,0). | |||||
| !> C and S as CLARTG for complex inputs (F,0) and (G,0). | |||||
| !> | !> | ||||
| !> This is a more accurate version of the BLAS1 routine SROTG, | !> This is a more accurate version of the BLAS1 routine SROTG, | ||||
| !> with the following other differences: | !> with the following other differences: | ||||
| @@ -45,8 +45,6 @@ | |||||
| !> floating point operations (saves work in SBDSQR when | !> floating point operations (saves work in SBDSQR when | ||||
| !> there are zeros on the diagonal). | !> there are zeros on the diagonal). | ||||
| !> | !> | ||||
| !> If F exceeds G in magnitude, C will be positive. | |||||
| !> | |||||
| !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. | !> Below, wp=>sp stands for single precision from LA_CONSTANTS module. | ||||
| !> \endverbatim | !> \endverbatim | ||||
| ! | ! | ||||
| @@ -112,7 +110,7 @@ | |||||
| subroutine SLARTG( f, g, c, s, r ) | subroutine SLARTG( f, g, c, s, r ) | ||||
| use LA_CONSTANTS, & | use LA_CONSTANTS, & | ||||
| only: wp=>sp, zero=>szero, half=>shalf, one=>sone, & | only: wp=>sp, zero=>szero, half=>shalf, one=>sone, & | ||||
| rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax | |||||
| safmin=>ssafmin, safmax=>ssafmax | |||||
| ! | ! | ||||
| ! -- LAPACK auxiliary routine -- | ! -- LAPACK auxiliary routine -- | ||||
| ! -- LAPACK is a software package provided by Univ. of Tennessee, -- | ! -- LAPACK is a software package provided by Univ. of Tennessee, -- | ||||
| @@ -123,11 +121,15 @@ subroutine SLARTG( f, g, c, s, r ) | |||||
| real(wp) :: c, f, g, r, s | real(wp) :: c, f, g, r, s | ||||
| ! .. | ! .. | ||||
| ! .. Local Scalars .. | ! .. Local Scalars .. | ||||
| real(wp) :: d, f1, fs, g1, gs, p, u, uu | |||||
| real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax | |||||
| ! .. | ! .. | ||||
| ! .. Intrinsic Functions .. | ! .. Intrinsic Functions .. | ||||
| intrinsic :: abs, sign, sqrt | intrinsic :: abs, sign, sqrt | ||||
| ! .. | ! .. | ||||
| ! .. Constants .. | |||||
| rtmin = sqrt( safmin ) | |||||
| rtmax = sqrt( safmax/2 ) | |||||
| ! .. | |||||
| ! .. Executable Statements .. | ! .. Executable Statements .. | ||||
| ! | ! | ||||
| f1 = abs( f ) | f1 = abs( f ) | ||||
| @@ -143,20 +145,18 @@ subroutine SLARTG( f, g, c, s, r ) | |||||
| else if( f1 > rtmin .and. f1 < rtmax .and. & | else if( f1 > rtmin .and. f1 < rtmax .and. & | ||||
| g1 > rtmin .and. g1 < rtmax ) then | g1 > rtmin .and. g1 < rtmax ) then | ||||
| d = sqrt( f*f + g*g ) | d = sqrt( f*f + g*g ) | ||||
| p = one / d | |||||
| c = f1*p | |||||
| s = g*sign( p, f ) | |||||
| c = f1 / d | |||||
| r = sign( d, f ) | r = sign( d, f ) | ||||
| s = g / r | |||||
| else | else | ||||
| u = min( safmax, max( safmin, f1, g1 ) ) | u = min( safmax, max( safmin, f1, g1 ) ) | ||||
| uu = one / u | |||||
| fs = f*uu | |||||
| gs = g*uu | |||||
| fs = f / u | |||||
| gs = g / u | |||||
| d = sqrt( fs*fs + gs*gs ) | d = sqrt( fs*fs + gs*gs ) | ||||
| p = one / d | |||||
| c = abs( fs )*p | |||||
| s = gs*sign( p, f ) | |||||
| r = sign( d, f )*u | |||||
| c = abs( fs ) / d | |||||
| r = sign( d, f ) | |||||
| s = gs / r | |||||
| r = r*u | |||||
| end if | end if | ||||
| return | return | ||||
| end subroutine | end subroutine | ||||
| @@ -11,8 +11,8 @@ | |||||
| ! SUBROUTINE ZLARTG( F, G, C, S, R ) | ! SUBROUTINE ZLARTG( F, G, C, S, R ) | ||||
| ! | ! | ||||
| ! .. Scalar Arguments .. | ! .. Scalar Arguments .. | ||||
| ! REAL(wp) C | |||||
| ! COMPLEX(wp) F, G, R, S | |||||
| ! REAL(wp) C | |||||
| ! COMPLEX(wp) F, G, R, S | |||||
| ! .. | ! .. | ||||
| ! | ! | ||||
| !> \par Purpose: | !> \par Purpose: | ||||
| @@ -30,7 +30,7 @@ | |||||
| !> The mathematical formulas used for C and S are | !> The mathematical formulas used for C and S are | ||||
| !> | !> | ||||
| !> sgn(x) = { x / |x|, x != 0 | !> sgn(x) = { x / |x|, x != 0 | ||||
| !> { 1, x = 0 | |||||
| !> { 1, x = 0 | |||||
| !> | !> | ||||
| !> R = sgn(F) * sqrt(|F|**2 + |G|**2) | !> R = sgn(F) * sqrt(|F|**2 + |G|**2) | ||||
| !> | !> | ||||
| @@ -38,6 +38,10 @@ | |||||
| !> | !> | ||||
| !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) | !> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2) | ||||
| !> | !> | ||||
| !> Special conditions: | |||||
| !> If G=0, then C=1 and S=0. | |||||
| !> If F=0, then C=0 and S is chosen so that R is real. | |||||
| !> | |||||
| !> When F and G are real, the formulas simplify to C = F/R and | !> When F and G are real, the formulas simplify to C = F/R and | ||||
| !> S = G/R, and the returned values of C, S, and R should be | !> S = G/R, and the returned values of C, S, and R should be | ||||
| !> identical to those returned by DLARTG. | !> identical to those returned by DLARTG. | ||||
| @@ -46,11 +50,8 @@ | |||||
| !> to avoid overflow or underflow in computing the square root of the | !> to avoid overflow or underflow in computing the square root of the | ||||
| !> sum of squares. | !> sum of squares. | ||||
| !> | !> | ||||
| !> This is a faster version of the BLAS1 routine ZROTG, except for | |||||
| !> the following differences: | |||||
| !> F and G are unchanged on return. | |||||
| !> If G=0, then C=1 and S=0. | |||||
| !> If F=0, then C=0 and S is chosen so that R is real. | |||||
| !> This is the same routine ZROTG fom BLAS1, except that | |||||
| !> F and G are unchanged on return. | |||||
| !> | !> | ||||
| !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. | !> Below, wp=>dp stands for double precision from LA_CONSTANTS module. | ||||
| !> \endverbatim | !> \endverbatim | ||||
| @@ -91,22 +92,19 @@ | |||||
| ! Authors: | ! Authors: | ||||
| ! ======== | ! ======== | ||||
| ! | ! | ||||
| !> \author Edward Anderson, Lockheed Martin | |||||
| !> \author Weslley Pereira, University of Colorado Denver, USA | |||||
| ! | ! | ||||
| !> \date August 2016 | |||||
| !> \date December 2021 | |||||
| ! | ! | ||||
| !> \ingroup OTHERauxiliary | !> \ingroup OTHERauxiliary | ||||
| ! | ! | ||||
| !> \par Contributors: | |||||
| ! ================== | |||||
| !> | |||||
| !> Weslley Pereira, University of Colorado Denver, USA | |||||
| ! | |||||
| !> \par Further Details: | !> \par Further Details: | ||||
| ! ===================== | ! ===================== | ||||
| !> | !> | ||||
| !> \verbatim | !> \verbatim | ||||
| !> | !> | ||||
| !> Based on the algorithm from | |||||
| !> | |||||
| !> Anderson E. (2017) | !> Anderson E. (2017) | ||||
| !> Algorithm 978: Safe Scaling in the Level 1 BLAS | !> Algorithm 978: Safe Scaling in the Level 1 BLAS | ||||
| !> ACM Trans Math Softw 44:1--28 | !> ACM Trans Math Softw 44:1--28 | ||||
| @@ -117,7 +115,7 @@ | |||||
| subroutine ZLARTG( f, g, c, s, r ) | subroutine ZLARTG( f, g, c, s, r ) | ||||
| use LA_CONSTANTS, & | use LA_CONSTANTS, & | ||||
| only: wp=>dp, zero=>dzero, one=>done, two=>dtwo, czero=>zzero, & | only: wp=>dp, zero=>dzero, one=>done, two=>dtwo, czero=>zzero, & | ||||
| rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax | |||||
| safmin=>dsafmin, safmax=>dsafmax | |||||
| ! | ! | ||||
| ! -- LAPACK auxiliary routine -- | ! -- LAPACK auxiliary routine -- | ||||
| ! -- LAPACK is a software package provided by Univ. of Tennessee, -- | ! -- LAPACK is a software package provided by Univ. of Tennessee, -- | ||||
| @@ -129,7 +127,7 @@ subroutine ZLARTG( f, g, c, s, r ) | |||||
| complex(wp) f, g, r, s | complex(wp) f, g, r, s | ||||
| ! .. | ! .. | ||||
| ! .. Local Scalars .. | ! .. Local Scalars .. | ||||
| real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w | |||||
| real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax | |||||
| complex(wp) :: fs, gs, t | complex(wp) :: fs, gs, t | ||||
| ! .. | ! .. | ||||
| ! .. Intrinsic Functions .. | ! .. Intrinsic Functions .. | ||||
| @@ -141,6 +139,9 @@ subroutine ZLARTG( f, g, c, s, r ) | |||||
| ! .. Statement Function definitions .. | ! .. Statement Function definitions .. | ||||
| ABSSQ( t ) = real( t )**2 + aimag( t )**2 | ABSSQ( t ) = real( t )**2 + aimag( t )**2 | ||||
| ! .. | ! .. | ||||
| ! .. Constants .. | |||||
| rtmin = sqrt( safmin ) | |||||
| ! .. | |||||
| ! .. Executable Statements .. | ! .. Executable Statements .. | ||||
| ! | ! | ||||
| if( g == czero ) then | if( g == czero ) then | ||||
| @@ -149,30 +150,43 @@ subroutine ZLARTG( f, g, c, s, r ) | |||||
| r = f | r = f | ||||
| else if( f == czero ) then | else if( f == czero ) then | ||||
| c = zero | c = zero | ||||
| g1 = max( abs(real(g)), abs(aimag(g)) ) | |||||
| if( g1 > rtmin .and. g1 < rtmax ) then | |||||
| if( real(g) == zero ) then | |||||
| r = abs(aimag(g)) | |||||
| s = conjg( g ) / r | |||||
| elseif( aimag(g) == zero ) then | |||||
| r = abs(real(g)) | |||||
| s = conjg( g ) / r | |||||
| else | |||||
| g1 = max( abs(real(g)), abs(aimag(g)) ) | |||||
| rtmax = sqrt( safmax/2 ) | |||||
| if( g1 > rtmin .and. g1 < rtmax ) then | |||||
| ! | ! | ||||
| ! Use unscaled algorithm | ! Use unscaled algorithm | ||||
| ! | ! | ||||
| g2 = ABSSQ( g ) | |||||
| d = sqrt( g2 ) | |||||
| s = conjg( g ) / d | |||||
| r = d | |||||
| else | |||||
| ! The following two lines can be replaced by `d = abs( g )`. | |||||
| ! This algorithm do not use the intrinsic complex abs. | |||||
| g2 = ABSSQ( g ) | |||||
| d = sqrt( g2 ) | |||||
| s = conjg( g ) / d | |||||
| r = d | |||||
| else | |||||
| ! | ! | ||||
| ! Use scaled algorithm | ! Use scaled algorithm | ||||
| ! | ! | ||||
| u = min( safmax, max( safmin, g1 ) ) | |||||
| uu = one / u | |||||
| gs = g*uu | |||||
| g2 = ABSSQ( gs ) | |||||
| d = sqrt( g2 ) | |||||
| s = conjg( gs ) / d | |||||
| r = d*u | |||||
| u = min( safmax, max( safmin, g1 ) ) | |||||
| gs = g / u | |||||
| ! The following two lines can be replaced by `d = abs( gs )`. | |||||
| ! This algorithm do not use the intrinsic complex abs. | |||||
| g2 = ABSSQ( gs ) | |||||
| d = sqrt( g2 ) | |||||
| s = conjg( gs ) / d | |||||
| r = d*u | |||||
| end if | |||||
| end if | end if | ||||
| else | else | ||||
| f1 = max( abs(real(f)), abs(aimag(f)) ) | f1 = max( abs(real(f)), abs(aimag(f)) ) | ||||
| g1 = max( abs(real(g)), abs(aimag(g)) ) | g1 = max( abs(real(g)), abs(aimag(g)) ) | ||||
| rtmax = sqrt( safmax/4 ) | |||||
| if( f1 > rtmin .and. f1 < rtmax .and. & | if( f1 > rtmin .and. f1 < rtmax .and. & | ||||
| g1 > rtmin .and. g1 < rtmax ) then | g1 > rtmin .and. g1 < rtmax ) then | ||||
| ! | ! | ||||
| @@ -181,32 +195,51 @@ subroutine ZLARTG( f, g, c, s, r ) | |||||
| f2 = ABSSQ( f ) | f2 = ABSSQ( f ) | ||||
| g2 = ABSSQ( g ) | g2 = ABSSQ( g ) | ||||
| h2 = f2 + g2 | h2 = f2 + g2 | ||||
| if( f2 > rtmin .and. h2 < rtmax ) then | |||||
| d = sqrt( f2*h2 ) | |||||
| ! safmin <= f2 <= h2 <= safmax | |||||
| if( f2 >= h2 * safmin ) then | |||||
| ! safmin <= f2/h2 <= 1, and h2/f2 is finite | |||||
| c = sqrt( f2 / h2 ) | |||||
| r = f / c | |||||
| rtmax = rtmax * 2 | |||||
| if( f2 > rtmin .and. h2 < rtmax ) then | |||||
| ! safmin <= sqrt( f2*h2 ) <= safmax | |||||
| s = conjg( g ) * ( f / sqrt( f2*h2 ) ) | |||||
| else | |||||
| s = conjg( g ) * ( r / h2 ) | |||||
| end if | |||||
| else | else | ||||
| d = sqrt( f2 )*sqrt( h2 ) | |||||
| ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. | |||||
| ! Moreover, | |||||
| ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, | |||||
| ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). | |||||
| ! Also, | |||||
| ! g2 >> f2, which means that h2 = g2. | |||||
| d = sqrt( f2 * h2 ) | |||||
| c = f2 / d | |||||
| if( c >= safmin ) then | |||||
| r = f / c | |||||
| else | |||||
| ! f2 / sqrt(f2 * h2) < safmin, then | |||||
| ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax | |||||
| r = f * ( h2 / d ) | |||||
| end if | |||||
| s = conjg( g ) * ( f / d ) | |||||
| end if | end if | ||||
| p = 1 / d | |||||
| c = f2*p | |||||
| s = conjg( g )*( f*p ) | |||||
| r = f*( h2*p ) | |||||
| else | else | ||||
| ! | ! | ||||
| ! Use scaled algorithm | ! Use scaled algorithm | ||||
| ! | ! | ||||
| u = min( safmax, max( safmin, f1, g1 ) ) | u = min( safmax, max( safmin, f1, g1 ) ) | ||||
| uu = one / u | |||||
| gs = g*uu | |||||
| gs = g / u | |||||
| g2 = ABSSQ( gs ) | g2 = ABSSQ( gs ) | ||||
| if( f1*uu < rtmin ) then | |||||
| if( f1 / u < rtmin ) then | |||||
| ! | ! | ||||
| ! f is not well-scaled when scaled by g1. | ! f is not well-scaled when scaled by g1. | ||||
| ! Use a different scaling for f. | ! Use a different scaling for f. | ||||
| ! | ! | ||||
| v = min( safmax, max( safmin, f1 ) ) | v = min( safmax, max( safmin, f1 ) ) | ||||
| vv = one / v | |||||
| w = v * uu | |||||
| fs = f*vv | |||||
| w = v / u | |||||
| fs = f / v | |||||
| f2 = ABSSQ( fs ) | f2 = ABSSQ( fs ) | ||||
| h2 = f2*w**2 + g2 | h2 = f2*w**2 + g2 | ||||
| else | else | ||||
| @@ -214,19 +247,43 @@ subroutine ZLARTG( f, g, c, s, r ) | |||||
| ! Otherwise use the same scaling for f and g. | ! Otherwise use the same scaling for f and g. | ||||
| ! | ! | ||||
| w = one | w = one | ||||
| fs = f*uu | |||||
| fs = f / u | |||||
| f2 = ABSSQ( fs ) | f2 = ABSSQ( fs ) | ||||
| h2 = f2 + g2 | h2 = f2 + g2 | ||||
| end if | end if | ||||
| if( f2 > rtmin .and. h2 < rtmax ) then | |||||
| d = sqrt( f2*h2 ) | |||||
| ! safmin <= f2 <= h2 <= safmax | |||||
| if( f2 >= h2 * safmin ) then | |||||
| ! safmin <= f2/h2 <= 1, and h2/f2 is finite | |||||
| c = sqrt( f2 / h2 ) | |||||
| r = fs / c | |||||
| rtmax = rtmax * 2 | |||||
| if( f2 > rtmin .and. h2 < rtmax ) then | |||||
| ! safmin <= sqrt( f2*h2 ) <= safmax | |||||
| s = conjg( gs ) * ( fs / sqrt( f2*h2 ) ) | |||||
| else | |||||
| s = conjg( gs ) * ( r / h2 ) | |||||
| end if | |||||
| else | else | ||||
| d = sqrt( f2 )*sqrt( h2 ) | |||||
| ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow. | |||||
| ! Moreover, | |||||
| ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax, | |||||
| ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax). | |||||
| ! Also, | |||||
| ! g2 >> f2, which means that h2 = g2. | |||||
| d = sqrt( f2 * h2 ) | |||||
| c = f2 / d | |||||
| if( c >= safmin ) then | |||||
| r = fs / c | |||||
| else | |||||
| ! f2 / sqrt(f2 * h2) < safmin, then | |||||
| ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax | |||||
| r = fs * ( h2 / d ) | |||||
| end if | |||||
| s = conjg( gs ) * ( fs / d ) | |||||
| end if | end if | ||||
| p = 1 / d | |||||
| c = ( f2*p )*w | |||||
| s = conjg( gs )*( fs*p ) | |||||
| r = ( fs*( h2*p ) )*u | |||||
| ! Rescale c and r | |||||
| c = c * w | |||||
| r = r * u | |||||
| end if | end if | ||||
| end if | end if | ||||
| return | return | ||||