Browse Source

Update to use safe scaling algorithm from Reference-LAPACK PR 527

tags/v0.3.24
Martin Kroeker GitHub 2 years ago
parent
commit
e08743d977
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 80 additions and 22 deletions
  1. +45
    -17
      interface/rotg.c
  2. +35
    -5
      interface/zrotg.c

+ 45
- 17
interface/rotg.c View File

@@ -1,9 +1,11 @@
#include <math.h>
#include <float.h>
#include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif


#ifndef CBLAS

void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
@@ -14,17 +16,27 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){

#endif

#ifdef DOUBLE
long double safmin = DBL_MIN;
#else
long double safmin = FLT_MIN;
#endif

#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)

long double da = *DA;
long double db = *DB;
long double c;
long double s;
long double r, roe, z;
long double r, z;
long double sigma, dascal,dbscal;

long double ada = fabsl(da);
long double adb = fabsl(db);
long double scale = ada + adb;
long double maxab = MAX(ada,adb);
long double safmax;
long double scale;


#ifndef CBLAS
PRINT_DEBUG_NAME;
@@ -32,17 +44,25 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
PRINT_DEBUG_CNAME;
#endif

roe = db;
if (ada > adb) roe = da;

if (scale == ZERO) {
if (adb == ZERO) {
*C = ONE;
*S = ZERO;
*DA = ZERO;
*DB = ZERO;
} else if (ada == ZERO) {
*C = ZERO;
*S = ONE;
*DA = *DB;
*DB = ONE;
} else {
r = sqrt(da * da + db * db);
if (roe < 0) r = -r;
safmax = 1./safmin;
scale = MIN(MAX(safmin,maxab), safmax);
if (ada > adb)
sigma = copysign(1.,da);
else
sigma = copysign(1.,db);
dascal = da / scale;
dbscal = db / scale;
r = sigma * (scale * sqrt(dascal * dascal + dbscal * dbscal));
c = da / r;
s = db / r;
z = ONE;
@@ -65,11 +85,22 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
FLOAT db = *DB;
FLOAT c = *C;
FLOAT s = *S;
FLOAT r, roe, z;
FLOAT sigma;
FLOAT r, z;

FLOAT ada = fabs(da);
FLOAT adb = fabs(db);
FLOAT scale = ada + adb;
FLOAT maxab = MAX(ada,adb);
long double safmax ;
FLOAT scale ;

safmax = 1./safmin;
scale = MIN(MAX(safmin,maxab), safmax);

if (ada > adb)
sigma = sign(1.,da);
else
sigma = sign(1.,db);

#ifndef CBLAS
PRINT_DEBUG_NAME;
@@ -77,20 +108,17 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
PRINT_DEBUG_CNAME;
#endif

roe = db;
if (ada > adb) roe = da;

if (scale == ZERO) {
if (adb == ZERO) {
*C = ONE;
*S = ZERO;
*DA = ZERO;
DA = ZERO;
*DB = ZERO;
} else {
FLOAT aa = da / scale;
FLOAT bb = db / scale;

r = scale * sqrt(aa * aa + bb * bb);
if (roe < 0) r = -r;
r = sigma * scale * sqrt(aa * aa + bb * bb);
c = da / r;
s = db / r;
z = ONE;


+ 35
- 5
interface/zrotg.c View File

@@ -1,9 +1,11 @@
#include <math.h>
#include <float.h>
#include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif


#ifndef CBLAS
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){

@@ -14,6 +16,12 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
FLOAT *S = (FLOAT*) VS;
#endif /* CBLAS */

#ifdef DOUBLE
long double safmin = DBL_MIN;
#else
long double safmin = FLT_MIN;
#endif

#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)

long double da_r = *(DA + 0);
@@ -23,6 +31,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
long double r;

long double ada = fabsl(da_r) + fabsl(da_i);
long double adb = sqrt(db_r * db_r + db_i * db_i);

PRINT_DEBUG_NAME;

@@ -38,10 +47,24 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
*(DA + 1) = db_i;
} else {
long double alpha_r, alpha_i;
long double safmax = 1./safmin;
long double sigma;
long double maxab = MAX(ada,adb);
long double scale = MIN(MAX(safmin,maxab), safmax);

ada = sqrt(da_r * da_r + da_i * da_i);

r = sqrt(da_r * da_r + da_i * da_i + db_r * db_r + db_i * db_i);
long double aa_r = da_r / scale;
long double aa_i = da_i / scale;
long double bb_r = db_r / scale;
long double bb_i = db_i / scale;

if (ada > adb)
sigma = copysign(1.,da_r);
else
sigma = copysign(1.,db_r);

r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);


alpha_r = da_r / ada;
alpha_i = da_i / ada;
@@ -60,7 +83,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
FLOAT r;

FLOAT ada = fabs(da_r) + fabs(da_i);
FLOAT adb;
FLOAT ada = fabs(db_r) + fabs(db_i);

PRINT_DEBUG_NAME;

@@ -75,6 +98,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
*(DA + 0) = db_r;
*(DA + 1) = db_i;
} else {
long double safmax = 1./safmin;
FLOAT scale;
FLOAT aa_r, aa_i, bb_r, bb_i;
FLOAT alpha_r, alpha_i;
@@ -108,14 +132,20 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
scale = (bb_i / bb_r);
adb = bb_r * sqrt(ONE + scale * scale);
}
scale = ada + adb;
FLOAT maxab = MAX(ada,adb);
scale = MIN(MAX(safmin,maxab), safmax);

aa_r = da_r / scale;
aa_i = da_i / scale;
bb_r = db_r / scale;
bb_i = db_i / scale;

r = scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);
if (ada > adb)
sigma = copysign(1.,da_r);
else
sigma = copysign(1.,db_r);

r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);

alpha_r = da_r / ada;
alpha_i = da_i / ada;


Loading…
Cancel
Save