Merge pull request #3821 from martin-frbg/lapack651

Add a BLAS3-based triangular Sylvester equation solver (Reference-LAPACK PR 651)
3 years ago · f16aa1ce7a
--- a/cmake/lapack.cmake
+++ b/cmake/lapack.cmake
@@ -123,7 +123,8 @@ set(SLASRC
   ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
   ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
   sgesvdq.f slaorhr_col_getrfnp.f
   slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f )
   slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f 
   slarmm.f slatrs3.f strsyl3.f)
 set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
   sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@@ -221,7 +222,8 @@ set(CLASRC
   cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
   chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
   cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f 
   cungtsqr.f cungtsqr_row.f cunhr_col.f )
   cungtsqr.f cungtsqr_row.f cunhr_col.f 
   clatrs3.f ctrsyl3.f )
 set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
   cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@@ -313,7 +315,8 @@ set(DLASRC
   dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
   dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
   dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
   dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f )
   dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f 
   dlarmm.f dlatrs3.f dtrsyl3.f)
 set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
   dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@@ -415,7 +418,8 @@ set(ZLASRC
   zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
   zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
   zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
   zungtsqr.f zungtsqr_row.f zunhr_col.f)
   zungtsqr.f zungtsqr_row.f zunhr_col.f
   zlatrs3.f ztrsyl3.f)
 set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
   zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
@@ -617,7 +621,8 @@ set(SLASRC
   ssyevd_2stage.c ssyev_2stage.c ssyevx_2stage.c ssyevr_2stage.c
   ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c
   sgesvdq.c slaorhr_col_getrfnp.c
   slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c )
   slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c 
   slarmm.c slatrs3.c strsyl3.c)
 set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c
   sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c
@@ -714,7 +719,8 @@ set(CLASRC
   cheevd_2stage.c cheev_2stage.c cheevx_2stage.c cheevr_2stage.c
   chbev_2stage.c chbevx_2stage.c chbevd_2stage.c chegv_2stage.c
   cgesvdq.c claunhr_col_getrfnp.c claunhr_col_getrfnp2.c 
   cungtsqr.c cungtsqr_row.c cunhr_col.c )
   cungtsqr.c cungtsqr_row.c cunhr_col.c 
   clatrs3.c ctrsyl3.c)
 set(CXLASRC cgesvxx.c cgerfsx.c cla_gerfsx_extended.c cla_geamv.c
   cla_gercond_c.c cla_gercond_x.c cla_gerpvgrw.c
@@ -805,7 +811,8 @@ set(DLASRC
   dsyevd_2stage.c dsyev_2stage.c dsyevx_2stage.c dsyevr_2stage.c
   dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c
   dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c
   dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c )
   dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c 
   dlarmm.c dlatrs3.c dtrsyl3.c)
 set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c
   dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c
@@ -906,7 +913,7 @@ set(ZLASRC
   zheevd_2stage.c zheev_2stage.c zheevx_2stage.c zheevr_2stage.c
   zhbev_2stage.c zhbevx_2stage.c zhbevd_2stage.c zhegv_2stage.c
   zgesvdq.c zlaunhr_col_getrfnp.c zlaunhr_col_getrfnp2.c
   zungtsqr.c zungtsqr_row.c zunhr_col.c)
   zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c)
 set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c
   zla_gercond_c.c zla_gercond_x.c zla_gerpvgrw.c zsysvxx.c zsyrfsx.c
--- a/lapack-netlib/LAPACKE/include/lapack.h
+++ b/lapack-netlib/LAPACKE/include/lapack.h
@@ -12,6 +12,7 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <inttypes.h>
 /* It seems all current Fortran compilers put strlen at end.
 *  Some historical compilers put strlen after the str argument
@@ -80,11 +81,26 @@ extern "C" {
 /*----------------------------------------------------------------------------*/
 #ifndef lapack_int
 #define lapack_int     int
 #if defined(LAPACK_ILP64)
 #define lapack_int        int64_t
 #else
 #define lapack_int        int32_t
 #endif
 #endif
 /*
 * Integer format string
 */
 #ifndef LAPACK_IFMT
 #if defined(LAPACK_ILP64)
 #define LAPACK_IFMT       PRId64
 #else
 #define LAPACK_IFMT       PRId32
 #endif
 #endif
 #ifndef lapack_logical
 #define lapack_logical lapack_int
 #define lapack_logical    lapack_int
 #endif
 /* f2c, hence clapack and MacOS Accelerate, returns double instead of float
@@ -115,7 +131,7 @@ typedef lapack_logical (*LAPACK_Z_SELECT2)
    ( const lapack_complex_double*, const lapack_complex_double* );
 #define LAPACK_lsame_base LAPACK_GLOBAL(lsame,LSAME)
 lapack_logical LAPACK_lsame_base( const char* ca, const char* cb,
 lapack_logical LAPACK_lsame_base( const char* ca,  const char* cb,
                              lapack_int lca, lapack_int lcb
 #ifdef LAPACK_FORTRAN_STRLEN_END
    , size_t, size_t
@@ -21986,6 +22002,84 @@ void LAPACK_ztrsyl_base(
    #define LAPACK_ztrsyl(...) LAPACK_ztrsyl_base(__VA_ARGS__)
 #endif
 #define LAPACK_ctrsyl3_base LAPACK_GLOBAL(ctrsyl3,CTRSYL3)
 void LAPACK_ctrsyl3_base(
    char const* trana, char const* tranb,
    lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
    lapack_complex_float const* A, lapack_int const* lda,
    lapack_complex_float const* B, lapack_int const* ldb,
    lapack_complex_float* C, lapack_int const* ldc, float* scale,
    float* swork, lapack_int const *ldswork,
    lapack_int* info
 #ifdef LAPACK_FORTRAN_STRLEN_END
    , size_t, size_t
 #endif
 );
 #ifdef LAPACK_FORTRAN_STRLEN_END
    #define LAPACK_ctrsyl3(...) LAPACK_ctrsyl3_base(__VA_ARGS__, 1, 1)
 #else
    #define LAPACK_ctrsyl3(...) LAPACK_ctrsyl3_base(__VA_ARGS__)
 #endif
 #define LAPACK_dtrsyl3_base LAPACK_GLOBAL(dtrsyl3,DTRSYL3)
 void LAPACK_dtrsyl3_base(
    char const* trana, char const* tranb,
    lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
    double const* A, lapack_int const* lda,
    double const* B, lapack_int const* ldb,
    double* C, lapack_int const* ldc, double* scale,
    lapack_int* iwork, lapack_int const* liwork,
    double* swork, lapack_int const *ldswork,
    lapack_int* info
 #ifdef LAPACK_FORTRAN_STRLEN_END
    , size_t, size_t
 #endif
 );
 #ifdef LAPACK_FORTRAN_STRLEN_END
    #define LAPACK_dtrsyl3(...) LAPACK_dtrsyl3_base(__VA_ARGS__, 1, 1)
 #else
    #define LAPACK_dtrsyl3(...) LAPACK_dtrsyl3_base(__VA_ARGS__)
 #endif
 #define LAPACK_strsyl3_base LAPACK_GLOBAL(strsyl3,STRSYL3)
 void LAPACK_strsyl3_base(
    char const* trana, char const* tranb,
    lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
    float const* A, lapack_int const* lda,
    float const* B, lapack_int const* ldb,
    float* C, lapack_int const* ldc, float* scale,
    lapack_int* iwork, lapack_int const* liwork,
    float* swork, lapack_int const *ldswork,
    lapack_int* info
 #ifdef LAPACK_FORTRAN_STRLEN_END
    , size_t, size_t
 #endif
 );
 #ifdef LAPACK_FORTRAN_STRLEN_END
    #define LAPACK_strsyl3(...) LAPACK_strsyl3_base(__VA_ARGS__, 1, 1)
 #else
    #define LAPACK_strsyl3(...) LAPACK_strsyl3_base(__VA_ARGS__)
 #endif
 #define LAPACK_ztrsyl3_base LAPACK_GLOBAL(ztrsyl3,ZTRSYL3)
 void LAPACK_ztrsyl3_base(
    char const* trana, char const* tranb,
    lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
    lapack_complex_double const* A, lapack_int const* lda,
    lapack_complex_double const* B, lapack_int const* ldb,
    lapack_complex_double* C, lapack_int const* ldc, double* scale,
    double* swork, lapack_int const *ldswork,
    lapack_int* info
 #ifdef LAPACK_FORTRAN_STRLEN_END
    , size_t, size_t
 #endif
 );
 #ifdef LAPACK_FORTRAN_STRLEN_END
    #define LAPACK_ztrsyl3(...) LAPACK_ztrsyl3_base(__VA_ARGS__, 1, 1)
 #else
    #define LAPACK_ztrsyl3(...) LAPACK_ztrsyl3_base(__VA_ARGS__)
 #endif
 #define LAPACK_ctrtri_base LAPACK_GLOBAL(ctrtri,CTRTRI)
 void LAPACK_ctrtri_base(
    char const* uplo, char const* diag,
--- a/lapack-netlib/LAPACKE/include/lapacke.h
+++ b/lapack-netlib/LAPACKE/include/lapacke.h
@@ -2313,6 +2313,19 @@ lapack_int LAPACKE_zlagge( int matrix_layout, lapack_int m, lapack_int n,
 float LAPACKE_slamch( char cmach );
 double LAPACKE_dlamch( char cmach );
 float LAPACKE_slangb( int matrix_layout, char norm, lapack_int n,
                      lapack_int kl, lapack_int ku, const float* ab,
                      lapack_int ldab );
 double LAPACKE_dlangb( int matrix_layout, char norm, lapack_int n,
                       lapack_int kl, lapack_int ku, const double* ab,
                       lapack_int ldab );
 float LAPACKE_clangb( int matrix_layout, char norm, lapack_int n,
                      lapack_int kl, lapack_int ku,
                      const lapack_complex_float* ab, lapack_int ldab );
 double LAPACKE_zlangb( int matrix_layout, char norm, lapack_int n,
                       lapack_int kl, lapack_int ku,
                       const lapack_complex_double* ab, lapack_int ldab );
 float LAPACKE_slange( int matrix_layout, char norm, lapack_int m,
                           lapack_int n, const float* a, lapack_int lda );
 double LAPACKE_dlange( int matrix_layout, char norm, lapack_int m,
@@ -4477,6 +4490,23 @@ lapack_int LAPACKE_ztrsyl( int matrix_layout, char trana, char tranb,
                           lapack_complex_double* c, lapack_int ldc,
                           double* scale );
 lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb,
                            lapack_int isgn, lapack_int m, lapack_int n,
                            const float* a, lapack_int lda, const float* b,
                            lapack_int ldb, float* c, lapack_int ldc,
                            float* scale );
 lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb,
                            lapack_int isgn, lapack_int m, lapack_int n,
                            const double* a, lapack_int lda, const double* b,
                            lapack_int ldb, double* c, lapack_int ldc,
                            double* scale );
 lapack_int LAPACKE_ztrsyl3( int matrix_layout, char trana, char tranb,
                            lapack_int isgn, lapack_int m, lapack_int n,
                            const lapack_complex_double* a, lapack_int lda,
                            const lapack_complex_double* b, lapack_int ldb,
                            lapack_complex_double* c, lapack_int ldc,
                            double* scale );
 lapack_int LAPACKE_strtri( int matrix_layout, char uplo, char diag, lapack_int n,
                           float* a, lapack_int lda );
 lapack_int LAPACKE_dtrtri( int matrix_layout, char uplo, char diag, lapack_int n,
@@ -7576,6 +7606,21 @@ double LAPACKE_dlapy3_work( double x, double y, double z );
 float LAPACKE_slamch_work( char cmach );
 double LAPACKE_dlamch_work( char cmach );
 float LAPACKE_slangb_work( int matrix_layout, char norm, lapack_int n,
                           lapack_int kl, lapack_int ku, const float* ab,
                           lapack_int ldab, float* work );
 double LAPACKE_dlangb_work( int matrix_layout, char norm, lapack_int n,
                            lapack_int kl, lapack_int ku, const double* ab,
                            lapack_int ldab, double* work );
 float LAPACKE_clangb_work( int matrix_layout, char norm, lapack_int n,
                           lapack_int kl, lapack_int ku,
                           const lapack_complex_float* ab, lapack_int ldab,
                           float* work );
 double LAPACKE_zlangb_work( int matrix_layout, char norm, lapack_int n,
                            lapack_int kl, lapack_int ku,
                            const lapack_complex_double* ab, lapack_int ldab,
                            double* work );
 float LAPACKE_slange_work( int matrix_layout, char norm, lapack_int m,
                                lapack_int n, const float* a, lapack_int lda,
                                float* work );
@@ -10174,6 +10219,35 @@ lapack_int LAPACKE_ztrsyl_work( int matrix_layout, char trana, char tranb,
                                lapack_complex_double* c, lapack_int ldc,
                                double* scale );
 lapack_int LAPACKE_strsyl3_work( int matrix_layout, char trana, char tranb,
                                 lapack_int isgn, lapack_int m, lapack_int n,
                                 const float* a, lapack_int lda,
                                 const float* b, lapack_int ldb,
                                 float* c, lapack_int ldc, float* scale,
                                 lapack_int* iwork, lapack_int liwork,
                                 float* swork, lapack_int ldswork );
 lapack_int LAPACKE_dtrsyl3_work( int matrix_layout, char trana, char tranb,
                                 lapack_int isgn, lapack_int m, lapack_int n,
                                 const double* a, lapack_int lda,
                                 const double* b, lapack_int ldb,
                                 double* c, lapack_int ldc, double* scale,
                                 lapack_int* iwork, lapack_int liwork,
                                 double* swork, lapack_int ldswork );
 lapack_int LAPACKE_ctrsyl3_work( int matrix_layout, char trana, char tranb,
                                 lapack_int isgn, lapack_int m, lapack_int n,
                                 const lapack_complex_float* a, lapack_int lda,
                                 const lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* c, lapack_int ldc,
                                 float* scale, float* swork,
                                 lapack_int ldswork );
 lapack_int LAPACKE_ztrsyl3_work( int matrix_layout, char trana, char tranb,
                                 lapack_int isgn, lapack_int m, lapack_int n,
                                 const lapack_complex_double* a, lapack_int lda,
                                 const lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* c, lapack_int ldc,
                                 double* scale, double* swork,
                                 lapack_int ldswork );
 lapack_int LAPACKE_strtri_work( int matrix_layout, char uplo, char diag,
                                lapack_int n, float* a, lapack_int lda );
 lapack_int LAPACKE_dtrtri_work( int matrix_layout, char uplo, char diag,
--- a/lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c
@@ -48,7 +48,6 @@ lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp,
    lapack_int lrwork = -1;
    float* rwork = NULL;
    float rwork_query;
    lapack_int i;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 );
        return -1;
--- a/lapack-netlib/LAPACKE/src/lapacke_ctrsyl3.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_ctrsyl3.c
@@ -0,0 +1,56 @@
 #include "lapacke_utils.h"
 lapack_int LAPACKE_ctrsyl3( int matrix_layout, char trana, char tranb,
                            lapack_int isgn, lapack_int m, lapack_int n,
                            const lapack_complex_float* a, lapack_int lda,
                            const lapack_complex_float* b, lapack_int ldb,
                            lapack_complex_float* c, lapack_int ldc,
                            float* scale )
 {
    lapack_int info = 0;
    float swork_query[2];
    float* swork = NULL;
    lapack_int ldswork = -1;
    lapack_int swork_size = -1;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_ctrsyl3", -1 );
        return -1;
    }
 #ifndef LAPACK_DISABLE_NAN_CHECK
    if( LAPACKE_get_nancheck() ) {
        /* Optionally check input matrices for NaNs */
        if( LAPACKE_cge_nancheck( matrix_layout, m, m, a, lda ) ) {
            return -7;
        }
        if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
            return -9;
        }
        if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) {
            return -11;
        }
    }
 #endif
    /* Query optimal working array sizes */
    info = LAPACKE_ctrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
                                 b, ldb, c, ldc, scale, swork_query, ldswork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    ldswork = swork_query[0];
    swork_size = ldswork * swork_query[1];
    swork = (float*)LAPACKE_malloc( sizeof(float) * swork_size);
    if( swork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_ctrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
                                 lda, b, ldb, c, ldc, scale, swork, ldswork );
    /* Release memory and exit */
    LAPACKE_free( swork );
 exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_ctrsyl3", info );
    }
    return info;
 }
--- a/lapack-netlib/LAPACKE/src/lapacke_ctrsyl3_work.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_ctrsyl3_work.c
@@ -0,0 +1,88 @@
 #include "lapacke_utils.h"
 lapack_int LAPACKE_ctrsyl3_work( int matrix_layout, char trana, char tranb,
                                 lapack_int isgn, lapack_int m, lapack_int n,
                                 const lapack_complex_float* a, lapack_int lda,
                                 const lapack_complex_float* b, lapack_int ldb,
                                 lapack_complex_float* c, lapack_int ldc,
                                 float* scale, float* swork,
                                 lapack_int ldswork )
 {
    lapack_int info = 0;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_ctrsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
                        scale, swork, &ldswork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        lapack_int ldb_t = MAX(1,n);
        lapack_int ldc_t = MAX(1,m);
        lapack_complex_float* a_t = NULL;
        lapack_complex_float* b_t = NULL;
        lapack_complex_float* c_t = NULL;
        /* Check leading dimension(s) */
        if( lda < m ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
            return info;
        }
        if( ldb < n ) {
            info = -10;
            LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
            return info;
        }
        if( ldc < n ) {
            info = -12;
            LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,m) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        c_t = (lapack_complex_float*)
            LAPACKE_malloc( sizeof(lapack_complex_float) * ldc_t * MAX(1,n) );
        if( c_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_2;
        }
        /* Transpose input matrices */
        LAPACKE_cge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
        LAPACKE_cge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
        LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
        /* Call LAPACK function and adjust info */
        LAPACK_ctrsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
                        c_t, &ldc_t, scale, swork, &ldswork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
        /* Release memory and exit */
        LAPACKE_free( c_t );
 exit_level_2:
        LAPACKE_free( b_t );
 exit_level_1:
        LAPACKE_free( a_t );
 exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
    }
    return info;
 }
--- a/lapack-netlib/LAPACKE/src/lapacke_dgesvdq.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_dgesvdq.c
@@ -48,7 +48,6 @@ lapack_int LAPACKE_dgesvdq( int matrix_layout, char joba, char jobp,
    lapack_int lrwork = -1;
    double* rwork = NULL;
    double rwork_query;
    lapack_int i;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dgesvdq", -1 );
        return -1;
--- a/lapack-netlib/LAPACKE/src/lapacke_dtrsyl3.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_dtrsyl3.c
@@ -0,0 +1,68 @@
 #include "lapacke_utils.h"
 lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb,
                            lapack_int isgn, lapack_int m, lapack_int n,
                            const double* a, lapack_int lda, const double* b,
                            lapack_int ldb, double* c, lapack_int ldc,
                            double* scale )
 {
    lapack_int info = 0;
    double swork_query[2];
    double* swork = NULL;
    lapack_int ldswork = -1;
    lapack_int swork_size = -1;
    lapack_int iwork_query;
    lapack_int* iwork = NULL;
    lapack_int liwork = -1;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dtrsyl3", -1 );
        return -1;
    }
 #ifndef LAPACK_DISABLE_NAN_CHECK
    if( LAPACKE_get_nancheck() ) {
        /* Optionally check input matrices for NaNs */
        if( LAPACKE_dge_nancheck( matrix_layout, m, m, a, lda ) ) {
            return -7;
        }
        if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) {
            return -9;
        }
        if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) {
            return -11;
        }
    }
 #endif
    /* Query optimal working array sizes */
    info = LAPACKE_dtrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
                                 b, ldb, c, ldc, scale, &iwork_query, liwork,
                                 swork_query, ldswork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    ldswork = swork_query[0];
    swork_size = ldswork * swork_query[1];
    swork = (double*)LAPACKE_malloc( sizeof(double) * swork_size);
    if( swork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    liwork = iwork_query;
    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
    if ( iwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_1;
    }
    /* Call middle-level interface */
    info = LAPACKE_dtrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
                                 lda, b, ldb, c, ldc, scale, iwork, liwork,
                                 swork, ldswork );
    /* Release memory and exit */
    LAPACKE_free( iwork );
 exit_level_1:
    LAPACKE_free( swork );
 exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_dtrsyl3", info );
    }
    return info;
 }
--- a/lapack-netlib/LAPACKE/src/lapacke_dtrsyl3_work.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_dtrsyl3_work.c
@@ -0,0 +1,86 @@
 #include "lapacke_utils.h"
 lapack_int LAPACKE_dtrsyl3_work( int matrix_layout, char trana, char tranb,
                                 lapack_int isgn, lapack_int m, lapack_int n,
                                 const double* a, lapack_int lda,
                                 const double* b, lapack_int ldb, double* c,
                                 lapack_int ldc, double* scale,
                                 lapack_int* iwork, lapack_int liwork,
                                 double* swork, lapack_int ldswork )
 {
    lapack_int info = 0;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_dtrsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
                        scale, iwork, &liwork, swork, &ldswork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        lapack_int ldb_t = MAX(1,n);
        lapack_int ldc_t = MAX(1,m);
        double* a_t = NULL;
        double* b_t = NULL;
        double* c_t = NULL;
        /* Check leading dimension(s) */
        if( lda < m ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
            return info;
        }
        if( ldb < n ) {
            info = -10;
            LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
            return info;
        }
        if( ldc < n ) {
            info = -12;
            LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,m) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        c_t = (double*)LAPACKE_malloc( sizeof(double) * ldc_t * MAX(1,n) );
        if( c_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_2;
        }
        /* Transpose input matrices */
        LAPACKE_dge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
        LAPACKE_dge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
        LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
        /* Call LAPACK function and adjust info */
        LAPACK_dtrsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
                        c_t, &ldc_t, scale, iwork, &liwork, swork, &ldswork,
                        &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
        /* Release memory and exit */
        LAPACKE_free( c_t );
 exit_level_2:
        LAPACKE_free( b_t );
 exit_level_1:
        LAPACKE_free( a_t );
 exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_dtrsyl3_work", info );
    }
    return info;
 }
--- a/lapack-netlib/LAPACKE/src/lapacke_sgesvdq.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_sgesvdq.c
@@ -48,7 +48,6 @@ lapack_int LAPACKE_sgesvdq( int matrix_layout, char joba, char jobp,
    lapack_int lrwork = -1;
    float* rwork = NULL;
    float rwork_query;
    lapack_int i;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_sgesvdq", -1 );
        return -1;
--- a/lapack-netlib/LAPACKE/src/lapacke_strsyl3.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_strsyl3.c
@@ -0,0 +1,68 @@
 #include "lapacke_utils.h"
 lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb,
                            lapack_int isgn, lapack_int m, lapack_int n,
                            const float* a, lapack_int lda, const float* b,
                            lapack_int ldb, float* c, lapack_int ldc,
                            float* scale )
 {
    lapack_int info = 0;
    float swork_query[2];
    float* swork = NULL;
    lapack_int ldswork = -1;
    lapack_int swork_size = -1;
    lapack_int iwork_query;
    lapack_int* iwork = NULL;
    lapack_int liwork = -1;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_strsyl3", -1 );
        return -1;
    }
 #ifndef LAPACK_DISABLE_NAN_CHECK
    if( LAPACKE_get_nancheck() ) {
        /* Optionally check input matrices for NaNs */
        if( LAPACKE_sge_nancheck( matrix_layout, m, m, a, lda ) ) {
            return -7;
        }
        if( LAPACKE_sge_nancheck( matrix_layout, n, n, b, ldb ) ) {
            return -9;
        }
        if( LAPACKE_sge_nancheck( matrix_layout, m, n, c, ldc ) ) {
            return -11;
        }
    }
 #endif
    /* Query optimal working array sizes */
    info = LAPACKE_strsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
                                 b, ldb, c, ldc, scale, &iwork_query, liwork,
                                 swork_query, ldswork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    ldswork = swork_query[0];
    swork_size = ldswork * swork_query[1];
    swork = (float*)LAPACKE_malloc( sizeof(float) * swork_size);
    if( swork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    liwork = iwork_query;
    iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
    if ( iwork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_1;
    }
    /* Call middle-level interface */
    info = LAPACKE_strsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
                                 lda, b, ldb, c, ldc, scale, iwork, liwork,
                                 swork, ldswork );
    /* Release memory and exit */
    LAPACKE_free( iwork );
 exit_level_1:
    LAPACKE_free( swork );
 exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_strsyl3", info );
    }
    return info;
 }
--- a/lapack-netlib/LAPACKE/src/lapacke_strsyl3_work.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_strsyl3_work.c
@@ -0,0 +1,86 @@
 #include "lapacke_utils.h"
 lapack_int LAPACKE_strsyl3_work( int matrix_layout, char trana, char tranb,
                                 lapack_int isgn, lapack_int m, lapack_int n,
                                 const float* a, lapack_int lda,
                                 const float* b, lapack_int ldb, float* c,
                                 lapack_int ldc, float* scale,
                                 lapack_int* iwork, lapack_int liwork,
                                 float* swork, lapack_int ldswork )
 {
    lapack_int info = 0;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_strsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
                        scale, iwork, &liwork, swork, &ldswork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        lapack_int ldb_t = MAX(1,n);
        lapack_int ldc_t = MAX(1,m);
        float* a_t = NULL;
        float* b_t = NULL;
        float* c_t = NULL;
        /* Check leading dimension(s) */
        if( lda < m ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
            return info;
        }
        if( ldb < n ) {
            info = -10;
            LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
            return info;
        }
        if( ldc < n ) {
            info = -12;
            LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,m) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        c_t = (float*)LAPACKE_malloc( sizeof(float) * ldc_t * MAX(1,n) );
        if( c_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_2;
        }
        /* Transpose input matrices */
        LAPACKE_sge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
        LAPACKE_sge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
        LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
        /* Call LAPACK function and adjust info */
        LAPACK_strsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
                        c_t, &ldc_t, scale, iwork, &liwork, swork, &ldswork,
                        &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_sge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
        /* Release memory and exit */
        LAPACKE_free( c_t );
 exit_level_2:
        LAPACKE_free( b_t );
 exit_level_1:
        LAPACKE_free( a_t );
 exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_strsyl3_work", info );
    }
    return info;
 }
--- a/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c
@@ -48,7 +48,6 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp,
    lapack_int lrwork = -1;
    double* rwork = NULL;
    double rwork_query;
    lapack_int i;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_zgesvdq", -1 );
        return -1;
--- a/lapack-netlib/LAPACKE/src/lapacke_ztrsyl3.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_ztrsyl3.c
@@ -0,0 +1,56 @@
 #include "lapacke_utils.h"
 lapack_int LAPACKE_ztrsyl3( int matrix_layout, char trana, char tranb,
                            lapack_int isgn, lapack_int m, lapack_int n,
                            const lapack_complex_double* a, lapack_int lda,
                            const lapack_complex_double* b, lapack_int ldb,
                            lapack_complex_double* c, lapack_int ldc,
                            double* scale )
 {
    lapack_int info = 0;
    double swork_query[2];
    double* swork = NULL;
    lapack_int ldswork = -1;
    lapack_int swork_size = -1;
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_ztrsyl3", -1 );
        return -1;
    }
 #ifndef LAPACK_DISABLE_NAN_CHECK
    if( LAPACKE_get_nancheck() ) {
        /* Optionally check input matrices for NaNs */
        if( LAPACKE_zge_nancheck( matrix_layout, m, m, a, lda ) ) {
            return -7;
        }
        if( LAPACKE_zge_nancheck( matrix_layout, n, n, b, ldb ) ) {
            return -9;
        }
        if( LAPACKE_zge_nancheck( matrix_layout, m, n, c, ldc ) ) {
            return -11;
        }
    }
 #endif
    /* Query optimal working array sizes */
    info = LAPACKE_ztrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
                                 b, ldb, c, ldc, scale, swork_query, ldswork );
    if( info != 0 ) {
        goto exit_level_0;
    }
    ldswork = swork_query[0];
    swork_size = ldswork * swork_query[1];
    swork = (double*)LAPACKE_malloc( sizeof(double) * swork_size);
    if( swork == NULL ) {
        info = LAPACK_WORK_MEMORY_ERROR;
        goto exit_level_0;
    }
    /* Call middle-level interface */
    info = LAPACKE_ztrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
                                 lda, b, ldb, c, ldc, scale, swork, ldswork );
    /* Release memory and exit */
    LAPACKE_free( swork );
 exit_level_0:
    if( info == LAPACK_WORK_MEMORY_ERROR ) {
        LAPACKE_xerbla( "LAPACKE_ztrsyl3", info );
    }
    return info;
 }
--- a/lapack-netlib/LAPACKE/src/lapacke_ztrsyl3_work.c
+++ b/lapack-netlib/LAPACKE/src/lapacke_ztrsyl3_work.c
@@ -0,0 +1,88 @@
 #include "lapacke_utils.h"
 lapack_int LAPACKE_ztrsyl3_work( int matrix_layout, char trana, char tranb,
                                 lapack_int isgn, lapack_int m, lapack_int n,
                                 const lapack_complex_double* a, lapack_int lda,
                                 const lapack_complex_double* b, lapack_int ldb,
                                 lapack_complex_double* c, lapack_int ldc,
                                 double* scale, double* swork,
                                 lapack_int ldswork )
 {
    lapack_int info = 0;
    if( matrix_layout == LAPACK_COL_MAJOR ) {
        /* Call LAPACK function and adjust info */
        LAPACK_ztrsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
                        scale, swork, &ldswork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
    } else if( matrix_layout == LAPACK_ROW_MAJOR ) {
        lapack_int lda_t = MAX(1,m);
        lapack_int ldb_t = MAX(1,n);
        lapack_int ldc_t = MAX(1,m);
        lapack_complex_double* a_t = NULL;
        lapack_complex_double* b_t = NULL;
        lapack_complex_double* c_t = NULL;
        /* Check leading dimension(s) */
        if( lda < m ) {
            info = -8;
            LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
            return info;
        }
        if( ldb < n ) {
            info = -10;
            LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
            return info;
        }
        if( ldc < n ) {
            info = -12;
            LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
            return info;
        }
        /* Allocate memory for temporary array(s) */
        a_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,m) );
        if( a_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_0;
        }
        b_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) );
        if( b_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_1;
        }
        c_t = (lapack_complex_double*)
            LAPACKE_malloc( sizeof(lapack_complex_double) * ldc_t * MAX(1,n) );
        if( c_t == NULL ) {
            info = LAPACK_TRANSPOSE_MEMORY_ERROR;
            goto exit_level_2;
        }
        /* Transpose input matrices */
        LAPACKE_zge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
        LAPACKE_zge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
        LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
        /* Call LAPACK function and adjust info */
        LAPACK_ztrsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
                        c_t, &ldc_t, scale, swork, &ldswork, &info );
        if( info < 0 ) {
            info = info - 1;
        }
        /* Transpose output matrices */
        LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
        /* Release memory and exit */
        LAPACKE_free( c_t );
 exit_level_2:
        LAPACKE_free( b_t );
 exit_level_1:
        LAPACKE_free( a_t );
 exit_level_0:
        if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
            LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
        }
    } else {
        info = -1;
        LAPACKE_xerbla( "LAPACKE_ztrsyl3_work", info );
    }
    return info;
 }
--- a/lapack-netlib/SRC/Makefile
+++ b/lapack-netlib/SRC/Makefile
@@ -207,7 +207,7 @@ SLASRC_O = \
   ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \
   ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \
   ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \
   sgesvdq.o 
   sgesvdq.o slarmm.o slatrs3.o strsyl3.o 
 endif
@@ -316,7 +316,7 @@ CLASRC_O = \
   chetrd_2stage.o chetrd_he2hb.o chetrd_hb2st.o chb2st_kernels.o \
   cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \
   chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \
   cgesvdq.o
   cgesvdq.o clatrs3.o ctrsyl3.o
 endif
 ifdef USEXBLAS
@@ -417,7 +417,7 @@ DLASRC_O = \
   dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \
   dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \
   dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \
   dgesvdq.o 
   dgesvdq.o dlarmm.o dlatrs3.o dtrsyl3.o
 endif
 ifdef USEXBLAS
@@ -526,7 +526,7 @@ ZLASRC_O = \
   zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \
   zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \
   zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \
   zgesvdq.o
   zgesvdq.o zlatrs3.o ztrsyl3.o
 endif
 ifdef USEXBLAS
--- a/lapack-netlib/SRC/clatrs3.c
+++ b/lapack-netlib/SRC/clatrs3.c
--- a/lapack-netlib/SRC/clatrs3.f
+++ b/lapack-netlib/SRC/clatrs3.f
@@ -0,0 +1,666 @@
 *> \brief \b CLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
 *
 *  Definition:
 *  ===========
 *
 *      SUBROUTINE CLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
 *                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIAG, NORMIN, TRANS, UPLO
 *       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
 *       ..
 *       .. Array Arguments ..
 *       REAL               CNORM( * ), SCALE( * ), WORK( * )
 *       COMPLEX            A( LDA, * ), X( LDX, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> CLATRS3 solves one of the triangular systems
 *>
 *>    A * X = B * diag(scale),  A**T * X = B * diag(scale), or
 *>    A**H * X = B * diag(scale)
 *>
 *> with scaling to prevent overflow.  Here A is an upper or lower
 *> triangular matrix, A**T denotes the transpose of A, A**H denotes the
 *> conjugate transpose of A. X and B are n-by-nrhs matrices and scale
 *> is an nrhs-element vector of scaling factors. A scaling factor scale(j)
 *> is usually less than or equal to 1, chosen such that X(:,j) is less
 *> than the overflow threshold. If the matrix A is singular (A(j,j) = 0
 *> for some j), then a non-trivial solution to A*X = 0 is returned. If
 *> the system is so badly scaled that the solution cannot be represented
 *> as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
 *>
 *> This is a BLAS-3 version of LATRS for solving several right
 *> hand sides simultaneously.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] UPLO
 *> \verbatim
 *>          UPLO is CHARACTER*1
 *>          Specifies whether the matrix A is upper or lower triangular.
 *>          = 'U':  Upper triangular
 *>          = 'L':  Lower triangular
 *> \endverbatim
 *>
 *> \param[in] TRANS
 *> \verbatim
 *>          TRANS is CHARACTER*1
 *>          Specifies the operation applied to A.
 *>          = 'N':  Solve A * x = s*b  (No transpose)
 *>          = 'T':  Solve A**T* x = s*b  (Transpose)
 *>          = 'C':  Solve A**T* x = s*b  (Conjugate transpose)
 *> \endverbatim
 *>
 *> \param[in] DIAG
 *> \verbatim
 *>          DIAG is CHARACTER*1
 *>          Specifies whether or not the matrix A is unit triangular.
 *>          = 'N':  Non-unit triangular
 *>          = 'U':  Unit triangular
 *> \endverbatim
 *>
 *> \param[in] NORMIN
 *> \verbatim
 *>          NORMIN is CHARACTER*1
 *>          Specifies whether CNORM has been set or not.
 *>          = 'Y':  CNORM contains the column norms on entry
 *>          = 'N':  CNORM is not set on entry.  On exit, the norms will
 *>                  be computed and stored in CNORM.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] NRHS
 *> \verbatim
 *>          NRHS is INTEGER
 *>          The number of columns of X.  NRHS >= 0.
 *> \endverbatim
 *>
 *> \param[in] A
 *> \verbatim
 *>          A is COMPLEX array, dimension (LDA,N)
 *>          The triangular matrix A.  If UPLO = 'U', the leading n by n
 *>          upper triangular part of the array A contains the upper
 *>          triangular matrix, and the strictly lower triangular part of
 *>          A is not referenced.  If UPLO = 'L', the leading n by n lower
 *>          triangular part of the array A contains the lower triangular
 *>          matrix, and the strictly upper triangular part of A is not
 *>          referenced.  If DIAG = 'U', the diagonal elements of A are
 *>          also not referenced and are assumed to be 1.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max (1,N).
 *> \endverbatim
 *>
 *> \param[in,out] X
 *> \verbatim
 *>          X is COMPLEX array, dimension (LDX,NRHS)
 *>          On entry, the right hand side B of the triangular system.
 *>          On exit, X is overwritten by the solution matrix X.
 *> \endverbatim
 *>
 *> \param[in] LDX
 *> \verbatim
 *>          LDX is INTEGER
 *>          The leading dimension of the array X.  LDX >= max (1,N).
 *> \endverbatim
 *>
 *> \param[out] SCALE
 *> \verbatim
 *>          SCALE is REAL array, dimension (NRHS)
 *>          The scaling factor s(k) is for the triangular system
 *>          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k).
 *>          If SCALE = 0, the matrix A is singular or badly scaled.
 *>          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
 *>          that is an exact or approximate solution to A*x(:,k) = 0
 *>          is returned. If the system so badly scaled that solution
 *>          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
 *>          is returned.
 *> \endverbatim
 *>
 *> \param[in,out] CNORM
 *> \verbatim
 *>          CNORM is REAL array, dimension (N)
 *>
 *>          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
 *>          contains the norm of the off-diagonal part of the j-th column
 *>          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
 *>          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
 *>          must be greater than or equal to the 1-norm.
 *>
 *>          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
 *>          returns the 1-norm of the offdiagonal part of the j-th column
 *>          of A.
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is REAL array, dimension (LWORK).
 *>          On exit, if INFO = 0, WORK(1) returns the optimal size of
 *>          WORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *>          LWORK is INTEGER
 *>          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
 *>          NBA = (N + NB - 1)/NB and NB is the optimal block size.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal dimensions of the WORK array, returns
 *>          this value as the first entry of the WORK array, and no error
 *>          message related to LWORK is issued by XERBLA.
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -k, the k-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleOTHERauxiliary
 *> \par Further Details:
 *  =====================
 *  \verbatim
 *  The algorithm follows the structure of a block triangular solve.
 *  The diagonal block is solved with a call to the robust the triangular
 *  solver LATRS for every right-hand side RHS = 1, ..., NRHS
 *     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
 *  where op( A ) = A or op( A ) = A**T or op( A ) = A**H.
 *  The linear block updates operate on block columns of X,
 *     B( I, K ) - op(A( I, J )) * X( J, K )
 *  and use GEMM. To avoid overflow in the linear block update, the worst case
 *  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
 *  such that
 *     || s * B( I, RHS )||_oo
 *   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold
 *
 *  Once all columns of a block column have been rescaled (BLAS-1), the linear
 *  update is executed with GEMM without overflow.
 *
 *  To limit rescaling, local scale factors track the scaling of column segments.
 *  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
 *  per right-hand side column RHS = 1, ..., NRHS. The global scale factor
 *  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
 *  I = 1, ..., NBA.
 *  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
 *  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
 *  linear update of potentially inconsistently scaled vector segments
 *     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
 *  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
 *  if necessary, rescales the blocks prior to calling GEMM.
 *
 *  \endverbatim
 *  =====================================================================
 *  References:
 *  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
 *  Parallel robust solution of triangular linear systems. Concurrency
 *  and Computation: Practice and Experience, 31(19), e5064.
 *
 *  Contributor:
 *   Angelika Schwarz, Umea University, Sweden.
 *
 *  =====================================================================
      SUBROUTINE CLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
     $                    X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
      IMPLICIT NONE
 *
 *     .. Scalar Arguments ..
      CHARACTER          DIAG, TRANS, NORMIN, UPLO
      INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
 *     ..
 *     .. Array Arguments ..
      COMPLEX            A( LDA, * ), X( LDX, * )
      REAL               CNORM( * ), SCALE( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      REAL               ZERO, ONE
      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
      COMPLEX            CZERO, CONE
      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ) )
      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
      INTEGER            NBMAX, NBMIN, NBRHS, NRHSMIN
      PARAMETER          ( NRHSMIN = 2, NBRHS = 32 )
      PARAMETER          ( NBMIN = 8, NBMAX = 64 )
 *     ..
 *     .. Local Arrays ..
      REAL               W( NBMAX ), XNRM( NBRHS )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            LQUERY, NOTRAN, NOUNIT, UPPER
      INTEGER            AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
     $                   JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
     $                   LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
      REAL               ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
     $                   SCAMIN, SMLNUM, TMAX
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ILAENV
      REAL               SLAMCH, CLANGE, SLARMM
      EXTERNAL           ILAENV, LSAME, SLAMCH, CLANGE, SLARMM
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           CLATRS, CSSCAL, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
      INFO = 0
      UPPER = LSAME( UPLO, 'U' )
      NOTRAN = LSAME( TRANS, 'N' )
      NOUNIT = LSAME( DIAG, 'N' )
      LQUERY = ( LWORK.EQ.-1 )
 *
 *     Partition A and X into blocks.
 *
      NB = MAX( NBMIN, ILAENV( 1, 'CLATRS', '', N, N, -1, -1 ) )
      NB = MIN( NBMAX, NB )
      NBA = MAX( 1, (N + NB - 1) / NB )
      NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
 *
 *     Compute the workspace
 *
 *     The workspace comprises two parts.
 *     The first part stores the local scale factors. Each simultaneously
 *     computed right-hand side requires one local scale factor per block
 *     row. WORK( I + KK * LDS ) is the scale factor of the vector
 *     segment associated with the I-th block row and the KK-th vector
 *     in the block column.
      LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
      LDS = NBA
 *     The second part stores upper bounds of the triangular A. There are
 *     a total of NBA x NBA blocks, of which only the upper triangular
 *     part or the lower triangular part is referenced. The upper bound of
 *     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
      LANRM = NBA * NBA
      AWRK = LSCALE
      WORK( 1 ) = LSCALE + LANRM
 *
 *     Test the input parameters.
 *
      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
         INFO = -1
      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
     $         LSAME( TRANS, 'C' ) ) THEN
         INFO = -2
      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
         INFO = -3
      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
     $         LSAME( NORMIN, 'N' ) ) THEN
         INFO = -4
      ELSE IF( N.LT.0 ) THEN
         INFO = -5
      ELSE IF( NRHS.LT.0 ) THEN
         INFO = -6
      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
         INFO = -8
      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
         INFO = -10
      ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
         INFO = -14
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'CLATRS3', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         RETURN
      END IF
 *
 *     Initialize scaling factors
 *
      DO KK = 1, NRHS
         SCALE( KK ) = ONE
      END DO
 *
 *     Quick return if possible
 *
      IF( MIN( N, NRHS ).EQ.0 )
     $   RETURN
 *
 *     Determine machine dependent constant to control overflow.
 *
      BIGNUM = SLAMCH( 'Overflow' )
      SMLNUM = SLAMCH( 'Safe Minimum' )
 *
 *     Use unblocked code for small problems
 *
      IF( NRHS.LT.NRHSMIN ) THEN
         CALL CLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1 ),
     $                SCALE( 1 ), CNORM, INFO )
         DO K = 2, NRHS
            CALL CLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
     $                   SCALE( K ), CNORM, INFO )
         END DO
         RETURN
      END IF
 *
 *     Compute norms of blocks of A excluding diagonal blocks and find
 *     the block with the largest norm TMAX.
 *
      TMAX = ZERO
      DO J = 1, NBA
         J1 = (J-1)*NB + 1
         J2 = MIN( J*NB, N ) + 1
         IF ( UPPER ) THEN
            IFIRST = 1
            ILAST = J - 1
         ELSE
            IFIRST = J + 1
            ILAST = NBA
         END IF
         DO I = IFIRST, ILAST
            I1 = (I-1)*NB + 1
            I2 = MIN( I*NB, N ) + 1
 *
 *           Compute upper bound of A( I1:I2-1, J1:J2-1 ).
 *
            IF( NOTRAN ) THEN
               ANRM = CLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
               WORK( AWRK + I+(J-1)*NBA ) = ANRM
            ELSE
               ANRM = CLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
               WORK( AWRK + J+(I-1)*NBA ) = ANRM
            END IF
            TMAX = MAX( TMAX, ANRM )
         END DO
      END DO
 *
      IF( .NOT. TMAX.LE.SLAMCH('Overflow') ) THEN
 *
 *        Some matrix entries have huge absolute value. At least one upper
 *        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
 *        number, either due to overflow in LANGE or due to Inf in A.
 *        Fall back to LATRS. Set normin = 'N' for every right-hand side to
 *        force computation of TSCAL in LATRS to avoid the likely overflow
 *        in the computation of the column norms CNORM.
 *
         DO K = 1, NRHS
            CALL CLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
     $                   SCALE( K ), CNORM, INFO )
         END DO
         RETURN
      END IF
 *
 *     Every right-hand side requires workspace to store NBA local scale
 *     factors. To save workspace, X is computed successively in block columns
 *     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
 *     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
      DO K = 1, NBX
 *        Loop over block columns (index = K) of X and, for column-wise scalings,
 *        over individual columns (index = KK).
 *        K1: column index of the first column in X( J, K )
 *        K2: column index of the first column in X( J, K+1 )
 *        so the K2 - K1 is the column count of the block X( J, K )
         K1 = (K-1)*NBRHS + 1
         K2 = MIN( K*NBRHS, NRHS ) + 1
 *
 *        Initialize local scaling factors of current block column X( J, K )
 *
         DO KK = 1, K2-K1
            DO I = 1, NBA
               WORK( I+KK*LDS ) = ONE
            END DO
         END DO
 *
         IF( NOTRAN ) THEN
 *
 *           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
 *
            IF( UPPER ) THEN
               JFIRST = NBA
               JLAST = 1
               JINC = -1
            ELSE
               JFIRST = 1
               JLAST = NBA
               JINC = 1
            END IF
         ELSE
 *
 *           Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
 *           where op(A) = A**T or op(A) = A**H
 *
            IF( UPPER ) THEN
               JFIRST = 1
               JLAST = NBA
               JINC = 1
            ELSE
               JFIRST = NBA
               JLAST = 1
               JINC = -1
            END IF
         END IF
         DO J = JFIRST, JLAST, JINC
 *           J1: row index of the first row in A( J, J )
 *           J2: row index of the first row in A( J+1, J+1 )
 *           so that J2 - J1 is the row count of the block A( J, J )
            J1 = (J-1)*NB + 1
            J2 = MIN( J*NB, N ) + 1
 *
 *           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
 *
            DO KK = 1, K2-K1
               RHS = K1 + KK - 1
               IF( KK.EQ.1 ) THEN
                  CALL CLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
     $                         A( J1, J1 ), LDA, X( J1, RHS ),
     $                         SCALOC, CNORM, INFO )
               ELSE
                  CALL CLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
     $                         A( J1, J1 ), LDA, X( J1, RHS ),
     $                         SCALOC, CNORM, INFO )
               END IF
 *              Find largest absolute value entry in the vector segment
 *              X( J1:J2-1, RHS ) as an upper bound for the worst case
 *              growth in the linear updates.
               XNRM( KK ) = CLANGE( 'I', J2-J1, 1, X( J1, RHS ),
     $                              LDX, W )
 *
               IF( SCALOC .EQ. ZERO ) THEN
 *                 LATRS found that A is singular through A(j,j) = 0.
 *                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
 *                 and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is
 *                 set by LATRS.
                  SCALE( RHS ) = ZERO
                  DO II = 1, J1-1
                     X( II, KK ) = CZERO
                  END DO
                  DO II = J2, N
                     X( II, KK ) = CZERO
                  END DO
 *                 Discard the local scale factors.
                  DO II = 1, NBA
                     WORK( II+KK*LDS ) = ONE
                  END DO
                  SCALOC = ONE
               ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
 *                 LATRS computed a valid scale factor, but combined with
 *                 the current scaling the solution does not have a
 *                 scale factor > 0.
 *
 *                 Set WORK( J+KK*LDS ) to smallest valid scale
 *                 factor and increase SCALOC accordingly.
                  SCAL = WORK( J+KK*LDS ) / SMLNUM
                  SCALOC = SCALOC * SCAL
                  WORK( J+KK*LDS ) = SMLNUM
 *                 If LATRS overestimated the growth, x may be
 *                 rescaled to preserve a valid combined scale
 *                 factor WORK( J, KK ) > 0.
                  RSCAL = ONE / SCALOC
                  IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
                     XNRM( KK ) = XNRM( KK ) * RSCAL
                     CALL CSSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
                     SCALOC = ONE
                  ELSE
 *                    The system op(A) * x = b is badly scaled and its
 *                    solution cannot be represented as (1/scale) * x.
 *                    Set x to zero. This approach deviates from LATRS
 *                    where a completely meaningless non-zero vector
 *                    is returned that is not a solution to op(A) * x = b.
                     SCALE( RHS ) = ZERO
                     DO II = 1, N
                        X( II, KK ) = CZERO
                     END DO
 *                    Discard the local scale factors.
                     DO II = 1, NBA
                        WORK( II+KK*LDS ) = ONE
                     END DO
                     SCALOC = ONE
                  END IF
               END IF
               SCALOC = SCALOC * WORK( J+KK*LDS )
               WORK( J+KK*LDS ) = SCALOC
            END DO
 *
 *           Linear block updates
 *
            IF( NOTRAN ) THEN
               IF( UPPER ) THEN
                  IFIRST = J - 1
                  ILAST = 1
                  IINC = -1
               ELSE
                  IFIRST = J + 1
                  ILAST = NBA
                  IINC = 1
               END IF
            ELSE
               IF( UPPER ) THEN
                  IFIRST = J + 1
                  ILAST = NBA
                  IINC = 1
               ELSE
                  IFIRST = J - 1
                  ILAST = 1
                  IINC = -1
               END IF
            END IF
 *
            DO I = IFIRST, ILAST, IINC
 *              I1: row index of the first column in X( I, K )
 *              I2: row index of the first column in X( I+1, K )
 *              so the I2 - I1 is the row count of the block X( I, K )
               I1 = (I-1)*NB + 1
               I2 = MIN( I*NB, N ) + 1
 *
 *              Prepare the linear update to be executed with GEMM.
 *              For each column, compute a consistent scaling, a
 *              scaling factor to survive the linear update, and
 *              rescale the column segments, if necesssary. Then
 *              the linear update is safely executed.
 *
               DO KK = 1, K2-K1
                  RHS = K1 + KK - 1
 *                 Compute consistent scaling
                  SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
 *
 *                 Compute scaling factor to survive the linear update
 *                 simulating consistent scaling.
 *
                  BNRM = CLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
                  BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
                  XNRM( KK ) = XNRM( KK )*( SCAMIN / WORK( J+KK*LDS) )
                  ANRM = WORK( AWRK + I+(J-1)*NBA )
                  SCALOC = SLARMM( ANRM, XNRM( KK ), BNRM )
 *
 *                 Simultaneously apply the robust update factor and the
 *                 consistency scaling factor to X( I, KK ) and X( J, KK ).
 *
                  SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
                  IF( SCAL.NE.ONE ) THEN
                     CALL CSSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
                     WORK( I+KK*LDS ) = SCAMIN*SCALOC
                  END IF
 *
                  SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
                  IF( SCAL.NE.ONE ) THEN
                     CALL CSSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
                     WORK( J+KK*LDS ) = SCAMIN*SCALOC
                  END IF
               END DO
 *
               IF( NOTRAN ) THEN
 *
 *                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
 *
                  CALL CGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -CONE,
     $                        A( I1, J1 ), LDA, X( J1, K1 ), LDX,
     $                        CONE, X( I1, K1 ), LDX )
               ELSE IF( LSAME( TRANS, 'T' ) ) THEN
 *
 *                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
 *
                  CALL CGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -CONE,
     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
     $                        CONE, X( I1, K1 ), LDX )
               ELSE
 *
 *                 B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K )
 *
                  CALL CGEMM( 'C', 'N', I2-I1, K2-K1, J2-J1, -CONE,
     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
     $                        CONE, X( I1, K1 ), LDX )
               END IF
            END DO
         END DO
 *
 *        Reduce local scaling factors
 *
         DO KK = 1, K2-K1
            RHS = K1 + KK - 1
            DO I = 1, NBA
               SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
            END DO
         END DO
 *
 *        Realize consistent scaling
 *
         DO KK = 1, K2-K1
            RHS = K1 + KK - 1
            IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
               DO I = 1, NBA
                  I1 = (I-1)*NB + 1
                  I2 = MIN( I*NB, N ) + 1
                  SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
                  IF( SCAL.NE.ONE )
     $               CALL CSSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
               END DO
            END IF
         END DO
      END DO
      RETURN
 *
 *     End of CLATRS3
 *
      END
--- a/lapack-netlib/SRC/ctrsyl3.c
+++ b/lapack-netlib/SRC/ctrsyl3.c
--- a/lapack-netlib/SRC/ctrsyl3.f
+++ b/lapack-netlib/SRC/ctrsyl3.f
--- a/lapack-netlib/SRC/dlarmm.c
+++ b/lapack-netlib/SRC/dlarmm.c
@@ -0,0 +1,605 @@
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 #if defined(_WIN64)
 typedef long long BLASLONG;
 typedef unsigned long long BLASULONG;
 #else
 typedef long BLASLONG;
 typedef unsigned long BLASULONG;
 #endif
 #ifdef LAPACK_ILP64
 typedef BLASLONG blasint;
 #if defined(_WIN64)
 #define blasabs(x) llabs(x)
 #else
 #define blasabs(x) labs(x)
 #endif
 #else
 typedef int blasint;
 #define blasabs(x) abs(x)
 #endif
 typedef blasint integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 #ifdef _MSC_VER
 static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
 static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
 static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
 static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
 #else
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #endif
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #ifdef _MSC_VER
 #define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
 #define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
 #else
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #endif
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimagf(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle_() continue;
 #define myceiling_(w) {ceil(w)}
 #define myhuge_(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
 #define myexp_(w) my_expfunc(w)
 static int my_expfunc(double *x) {int e; (void)frexp(*x,&e); return e;}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 #ifdef _MSC_VER
 static _Fcomplex cpow_ui(complex x, integer n) {
 	complex pow={1.0,0.0}; unsigned long int u;
 		if(n != 0) {
 		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
 		for(u = n; ; ) {
 			if(u & 01) pow.r *= x.r, pow.i *= x.i;
 			if(u >>= 1) x.r *= x.r, x.i *= x.i;
 			else break;
 		}
 	}
 	_Fcomplex p={pow.r, pow.i};
 	return p;
 }
 #else
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 #endif
 #ifdef _MSC_VER
 static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
 	_Dcomplex pow={1.0,0.0}; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
 		for(u = n; ; ) {
 			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
 			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
 			else break;
 		}
 	}
 	_Dcomplex p = {pow._Val[0], pow._Val[1]};
 	return p;
 }
 #else
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 #endif
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 #ifdef _MSC_VER
 	_Fcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
 			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
 			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
 		}
 	}
 	pCf(z) = zdotc;
 }
 #else
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 #endif
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 #ifdef _MSC_VER
 	_Dcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
 			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
 			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
 		}
 	}
 	pCd(z) = zdotc;
 }
 #else
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 #ifdef _MSC_VER
 	_Fcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
 			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
 			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
 		}
 	}
 	pCf(z) = zdotc;
 }
 #else
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 #endif
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 #ifdef _MSC_VER
 	_Dcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
 			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
 			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
 		}
 	}
 	pCd(z) = zdotc;
 }
 #else
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DLARMM */
 /* Definition: */
 /* =========== */
 /*      DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM ) */
 /*      DOUBLE PRECISION   ANORM, BNORM, CNORM */
 /* >  \par Purpose: */
 /*  ======= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DLARMM returns a factor s in (0, 1] such that the linear updates */
 /* > */
 /* >    (s * C) - A * (s * B)  and  (s * C) - (s * A) * B */
 /* > */
 /* > cannot overflow, where A, B, and C are matrices of conforming */
 /* > dimensions. */
 /* > */
 /* > This is an auxiliary routine so there is no argument checking. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========= */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is DOUBLE PRECISION */
 /* >          The infinity norm of A. ANORM >= 0. */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] BNORM */
 /* > \verbatim */
 /* >          BNORM is DOUBLE PRECISION */
 /* >          The infinity norm of B. BNORM >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] CNORM */
 /* > \verbatim */
 /* >          CNORM is DOUBLE PRECISION */
 /* >          The infinity norm of C. CNORM >= 0. */
 /* > \endverbatim */
 /* > */
 /* > */
 /*  ===================================================================== */
 /* >  References: */
 /* >    C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for */
 /* >    Robust Solution of Triangular Linear Systems. In: International */
 /* >    Conference on Parallel Processing and Applied Mathematics, pages */
 /* >    68--78. Springer, 2017. */
 /* > */
 /* > \ingroup OTHERauxiliary */
 /*  ===================================================================== */
 doublereal dlarmm_(doublereal *anorm, doublereal *bnorm, doublereal *cnorm)
 {
    /* System generated locals */
    doublereal ret_val;
    /* Local variables */
    extern doublereal dlamch_(char *);
    doublereal bignum, smlnum;
 /*     Determine machine dependent parameters to control overflow. */
    smlnum = dlamch_("Safe minimum") / dlamch_("Precision");
    bignum = 1. / smlnum / 4.;
 /*     Compute a scale factor. */
    ret_val = 1.;
    if (*bnorm <= 1.) {
 	if (*anorm * *bnorm > bignum - *cnorm) {
 	    ret_val = .5;
 	}
    } else {
 	if (*anorm > (bignum - *cnorm) / *bnorm) {
 	    ret_val = .5 / *bnorm;
 	}
    }
    return ret_val;
 /*     ==== End of DLARMM ==== */
 } /* dlarmm_ */
--- a/lapack-netlib/SRC/dlarmm.f
+++ b/lapack-netlib/SRC/dlarmm.f
@@ -0,0 +1,99 @@
 *> \brief \b DLARMM
 *
 * Definition:
 * ===========
 *
 *      DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM )
 *
 *     .. Scalar Arguments ..
 *      DOUBLE PRECISION   ANORM, BNORM, CNORM
 *     ..
 *
 *>  \par Purpose:
 *  =======
 *>
 *> \verbatim
 *>
 *> DLARMM returns a factor s in (0, 1] such that the linear updates
 *>
 *>    (s * C) - A * (s * B)  and  (s * C) - (s * A) * B
 *>
 *> cannot overflow, where A, B, and C are matrices of conforming
 *> dimensions.
 *>
 *> This is an auxiliary routine so there is no argument checking.
 *> \endverbatim
 *
 *  Arguments:
 *  =========
 *
 *> \param[in] ANORM
 *> \verbatim
 *>          ANORM is DOUBLE PRECISION
 *>          The infinity norm of A. ANORM >= 0.
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] BNORM
 *> \verbatim
 *>          BNORM is DOUBLE PRECISION
 *>          The infinity norm of B. BNORM >= 0.
 *> \endverbatim
 *>
 *> \param[in] CNORM
 *> \verbatim
 *>          CNORM is DOUBLE PRECISION
 *>          The infinity norm of C. CNORM >= 0.
 *> \endverbatim
 *>
 *>
 *  =====================================================================
 *>  References:
 *>    C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for
 *>    Robust Solution of Triangular Linear Systems. In: International
 *>    Conference on Parallel Processing and Applied Mathematics, pages
 *>    68--78. Springer, 2017.
 *>
 *> \ingroup OTHERauxiliary
 *  =====================================================================
      DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM )
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      DOUBLE PRECISION   ANORM, BNORM, CNORM
 *     .. Parameters ..
      DOUBLE PRECISION   ONE, HALF, FOUR
      PARAMETER          ( ONE = 1.0D0, HALF = 0.5D+0, FOUR = 4.0D0 )
 *     ..
 *     .. Local Scalars ..
       DOUBLE PRECISION   BIGNUM, SMLNUM
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLAMCH
      EXTERNAL           DLAMCH
 *     ..
 *     .. Executable Statements ..
 *
 *
 *     Determine machine dependent parameters to control overflow.
 *
      SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
      BIGNUM = ( ONE / SMLNUM ) / FOUR
 *
 *     Compute a scale factor.
 *
      DLARMM = ONE
      IF( BNORM .LE. ONE ) THEN
         IF( ANORM * BNORM .GT. BIGNUM - CNORM ) THEN
            DLARMM = HALF
         END IF
      ELSE
         IF( ANORM .GT. (BIGNUM - CNORM) / BNORM ) THEN
            DLARMM = HALF / BNORM
         END IF
      END IF
      RETURN
 *
 *     ==== End of DLARMM ====
 *
      END
--- a/lapack-netlib/SRC/dlatrs3.c
+++ b/lapack-netlib/SRC/dlatrs3.c
--- a/lapack-netlib/SRC/dlatrs3.f
+++ b/lapack-netlib/SRC/dlatrs3.f
@@ -0,0 +1,656 @@
 *> \brief \b DLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
 *
 *  Definition:
 *  ===========
 *
 *      SUBROUTINE DLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
 *                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIAG, NORMIN, TRANS, UPLO
 *       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), CNORM( * ), SCALE( * ), 
 *                          WORK( * ), X( LDX, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLATRS3 solves one of the triangular systems
 *>
 *>    A * X = B * diag(scale)  or  A**T * X = B * diag(scale)
 *>
 *> with scaling to prevent overflow.  Here A is an upper or lower
 *> triangular matrix, A**T denotes the transpose of A. X and B are
 *> n by nrhs matrices and scale is an nrhs element vector of scaling
 *> factors. A scaling factor scale(j) is usually less than or equal
 *> to 1, chosen such that X(:,j) is less than the overflow threshold.
 *> If the matrix A is singular (A(j,j) = 0 for some j), then
 *> a non-trivial solution to A*X = 0 is returned. If the system is
 *> so badly scaled that the solution cannot be represented as
 *> (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
 *>
 *> This is a BLAS-3 version of LATRS for solving several right
 *> hand sides simultaneously.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] UPLO
 *> \verbatim
 *>          UPLO is CHARACTER*1
 *>          Specifies whether the matrix A is upper or lower triangular.
 *>          = 'U':  Upper triangular
 *>          = 'L':  Lower triangular
 *> \endverbatim
 *>
 *> \param[in] TRANS
 *> \verbatim
 *>          TRANS is CHARACTER*1
 *>          Specifies the operation applied to A.
 *>          = 'N':  Solve A * x = s*b  (No transpose)
 *>          = 'T':  Solve A**T* x = s*b  (Transpose)
 *>          = 'C':  Solve A**T* x = s*b  (Conjugate transpose = Transpose)
 *> \endverbatim
 *>
 *> \param[in] DIAG
 *> \verbatim
 *>          DIAG is CHARACTER*1
 *>          Specifies whether or not the matrix A is unit triangular.
 *>          = 'N':  Non-unit triangular
 *>          = 'U':  Unit triangular
 *> \endverbatim
 *>
 *> \param[in] NORMIN
 *> \verbatim
 *>          NORMIN is CHARACTER*1
 *>          Specifies whether CNORM has been set or not.
 *>          = 'Y':  CNORM contains the column norms on entry
 *>          = 'N':  CNORM is not set on entry.  On exit, the norms will
 *>                  be computed and stored in CNORM.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] NRHS
 *> \verbatim
 *>          NRHS is INTEGER
 *>          The number of columns of X.  NRHS >= 0.
 *> \endverbatim
 *>
 *> \param[in] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          The triangular matrix A.  If UPLO = 'U', the leading n by n
 *>          upper triangular part of the array A contains the upper
 *>          triangular matrix, and the strictly lower triangular part of
 *>          A is not referenced.  If UPLO = 'L', the leading n by n lower
 *>          triangular part of the array A contains the lower triangular
 *>          matrix, and the strictly upper triangular part of A is not
 *>          referenced.  If DIAG = 'U', the diagonal elements of A are
 *>          also not referenced and are assumed to be 1.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max (1,N).
 *> \endverbatim
 *>
 *> \param[in,out] X
 *> \verbatim
 *>          X is DOUBLE PRECISION array, dimension (LDX,NRHS)
 *>          On entry, the right hand side B of the triangular system.
 *>          On exit, X is overwritten by the solution matrix X.
 *> \endverbatim
 *>
 *> \param[in] LDX
 *> \verbatim
 *>          LDX is INTEGER
 *>          The leading dimension of the array X.  LDX >= max (1,N).
 *> \endverbatim
 *>
 *> \param[out] SCALE
 *> \verbatim
 *>          SCALE is DOUBLE PRECISION array, dimension (NRHS)
 *>          The scaling factor s(k) is for the triangular system
 *>          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k).
 *>          If SCALE = 0, the matrix A is singular or badly scaled.
 *>          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
 *>          that is an exact or approximate solution to A*x(:,k) = 0
 *>          is returned. If the system so badly scaled that solution
 *>          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
 *>          is returned.
 *> \endverbatim
 *>
 *> \param[in,out] CNORM
 *> \verbatim
 *>          CNORM is DOUBLE PRECISION array, dimension (N)
 *>
 *>          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
 *>          contains the norm of the off-diagonal part of the j-th column
 *>          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
 *>          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
 *>          must be greater than or equal to the 1-norm.
 *>
 *>          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
 *>          returns the 1-norm of the offdiagonal part of the j-th column
 *>          of A.
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (LWORK).
 *>          On exit, if INFO = 0, WORK(1) returns the optimal size of
 *>          WORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *>          LWORK is INTEGER
 *>          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
 *>          NBA = (N + NB - 1)/NB and NB is the optimal block size.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal dimensions of the WORK array, returns
 *>          this value as the first entry of the WORK array, and no error
 *>          message related to LWORK is issued by XERBLA.
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -k, the k-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleOTHERauxiliary
 *> \par Further Details:
 *  =====================
 *  \verbatim
 *  The algorithm follows the structure of a block triangular solve.
 *  The diagonal block is solved with a call to the robust the triangular
 *  solver LATRS for every right-hand side RHS = 1, ..., NRHS
 *     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
 *  where op( A ) = A or op( A ) = A**T.
 *  The linear block updates operate on block columns of X,
 *     B( I, K ) - op(A( I, J )) * X( J, K )
 *  and use GEMM. To avoid overflow in the linear block update, the worst case
 *  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
 *  such that
 *     || s * B( I, RHS )||_oo
 *   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold
 *
 *  Once all columns of a block column have been rescaled (BLAS-1), the linear
 *  update is executed with GEMM without overflow.
 *
 *  To limit rescaling, local scale factors track the scaling of column segments.
 *  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
 *  per right-hand side column RHS = 1, ..., NRHS. The global scale factor
 *  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
 *  I = 1, ..., NBA.
 *  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
 *  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
 *  linear update of potentially inconsistently scaled vector segments
 *     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
 *  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
 *  if necessary, rescales the blocks prior to calling GEMM.
 *
 *  \endverbatim
 *  =====================================================================
 *  References:
 *  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
 *  Parallel robust solution of triangular linear systems. Concurrency
 *  and Computation: Practice and Experience, 31(19), e5064.
 *
 *  Contributor:
 *   Angelika Schwarz, Umea University, Sweden.
 *
 *  =====================================================================
      SUBROUTINE DLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
     $                    X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
      IMPLICIT NONE
 *
 *     .. Scalar Arguments ..
      CHARACTER          DIAG, TRANS, NORMIN, UPLO
      INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), CNORM( * ), X( LDX, * ),
     $                   SCALE( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE
      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
      INTEGER            NBMAX, NBMIN, NBRHS, NRHSMIN
      PARAMETER          ( NRHSMIN = 2, NBRHS = 32 )
      PARAMETER          ( NBMIN = 8, NBMAX = 64 )
 *     ..
 *     .. Local Arrays ..
      DOUBLE PRECISION   W( NBMAX ), XNRM( NBRHS )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            LQUERY, NOTRAN, NOUNIT, UPPER
      INTEGER            AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
     $                   JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
     $                   LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
     $                   SCAMIN, SMLNUM, TMAX
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ILAENV
      DOUBLE PRECISION   DLAMCH, DLANGE, DLARMM
      EXTERNAL           DLAMCH, DLANGE, DLARMM, ILAENV, LSAME
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLATRS, DSCAL, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
      INFO = 0
      UPPER = LSAME( UPLO, 'U' )
      NOTRAN = LSAME( TRANS, 'N' )
      NOUNIT = LSAME( DIAG, 'N' )
      LQUERY = ( LWORK.EQ.-1 )
 *
 *     Partition A and X into blocks
 *
      NB = MAX( 8, ILAENV( 1, 'DLATRS', '', N, N, -1, -1 ) )
      NB = MIN( NBMAX, NB )
      NBA = MAX( 1, (N + NB - 1) / NB )
      NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
 *
 *     Compute the workspace
 *
 *     The workspace comprises two parts.
 *     The first part stores the local scale factors. Each simultaneously
 *     computed right-hand side requires one local scale factor per block
 *     row. WORK( I+KK*LDS ) is the scale factor of the vector
 *     segment associated with the I-th block row and the KK-th vector
 *     in the block column.
      LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
      LDS = NBA
 *     The second part stores upper bounds of the triangular A. There are
 *     a total of NBA x NBA blocks, of which only the upper triangular
 *     part or the lower triangular part is referenced. The upper bound of
 *     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
      LANRM = NBA * NBA
      AWRK = LSCALE
      WORK( 1 ) = LSCALE + LANRM
 *
 *     Test the input parameters
 *
      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
         INFO = -1
      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
     $         LSAME( TRANS, 'C' ) ) THEN
         INFO = -2
      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
         INFO = -3
      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
     $         LSAME( NORMIN, 'N' ) ) THEN
         INFO = -4
      ELSE IF( N.LT.0 ) THEN
         INFO = -5
      ELSE IF( NRHS.LT.0 ) THEN
         INFO = -6
      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
         INFO = -8
      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
         INFO = -10
      ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
         INFO = -14
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DLATRS3', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         RETURN
      END IF
 *
 *     Initialize scaling factors
 *
      DO KK = 1, NRHS
         SCALE( KK ) = ONE
      END DO
 *
 *     Quick return if possible
 *
      IF( MIN( N, NRHS ).EQ.0 )
     $   RETURN
 *
 *     Determine machine dependent constant to control overflow.
 *
      BIGNUM = DLAMCH( 'Overflow' )
      SMLNUM = DLAMCH( 'Safe Minimum' )
 *
 *     Use unblocked code for small problems
 *
      IF( NRHS.LT.NRHSMIN ) THEN
         CALL DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
     $                SCALE( 1 ), CNORM, INFO )
         DO K = 2, NRHS
            CALL DLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
     $                   SCALE( K ), CNORM, INFO )
         END DO
         RETURN
      END IF
 *
 *     Compute norms of blocks of A excluding diagonal blocks and find
 *     the block with the largest norm TMAX.
 *
      TMAX = ZERO
      DO J = 1, NBA
         J1 = (J-1)*NB + 1
         J2 = MIN( J*NB, N ) + 1
         IF ( UPPER ) THEN
            IFIRST = 1
            ILAST = J - 1
         ELSE
            IFIRST = J + 1
            ILAST = NBA
         END IF
         DO I = IFIRST, ILAST
            I1 = (I-1)*NB + 1
            I2 = MIN( I*NB, N ) + 1
 *
 *           Compute upper bound of A( I1:I2-1, J1:J2-1 ).
 *
            IF( NOTRAN ) THEN
               ANRM = DLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
               WORK( AWRK + I+(J-1)*NBA ) = ANRM
            ELSE
               ANRM = DLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
               WORK( AWRK + J+(I-1)*NBA ) = ANRM
            END IF
            TMAX = MAX( TMAX, ANRM )
         END DO
      END DO
 *
      IF( .NOT. TMAX.LE.DLAMCH('Overflow') ) THEN
 *
 *        Some matrix entries have huge absolute value. At least one upper
 *        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
 *        number, either due to overflow in LANGE or due to Inf in A.
 *        Fall back to LATRS. Set normin = 'N' for every right-hand side to
 *        force computation of TSCAL in LATRS to avoid the likely overflow
 *        in the computation of the column norms CNORM.
 *
         DO K = 1, NRHS
            CALL DLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
     $                   SCALE( K ), CNORM, INFO )
         END DO
         RETURN
      END IF
 *
 *     Every right-hand side requires workspace to store NBA local scale
 *     factors. To save workspace, X is computed successively in block columns
 *     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
 *     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
      DO K = 1, NBX
 *        Loop over block columns (index = K) of X and, for column-wise scalings,
 *        over individual columns (index = KK).
 *        K1: column index of the first column in X( J, K )
 *        K2: column index of the first column in X( J, K+1 )
 *        so the K2 - K1 is the column count of the block X( J, K )
         K1 = (K-1)*NBRHS + 1
         K2 = MIN( K*NBRHS, NRHS ) + 1
 *
 *        Initialize local scaling factors of current block column X( J, K )
 *
         DO KK = 1, K2-K1
            DO I = 1, NBA
               WORK( I+KK*LDS ) = ONE
            END DO
         END DO
 *
         IF( NOTRAN ) THEN
 *
 *           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
 *
            IF( UPPER ) THEN
               JFIRST = NBA
               JLAST = 1
               JINC = -1
            ELSE
               JFIRST = 1
               JLAST = NBA
               JINC = 1
            END IF
         ELSE
 *
 *           Solve A**T * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
 *
            IF( UPPER ) THEN
               JFIRST = 1
               JLAST = NBA
               JINC = 1
            ELSE
               JFIRST = NBA
               JLAST = 1
               JINC = -1
            END IF
         END IF
 *
         DO J = JFIRST, JLAST, JINC
 *           J1: row index of the first row in A( J, J )
 *           J2: row index of the first row in A( J+1, J+1 )
 *           so that J2 - J1 is the row count of the block A( J, J )
            J1 = (J-1)*NB + 1
            J2 = MIN( J*NB, N ) + 1
 *
 *           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
 *           for all right-hand sides in the current block column,
 *           one RHS at a time.
 *
            DO KK = 1, K2-K1
               RHS = K1 + KK - 1
               IF( KK.EQ.1 ) THEN
                  CALL DLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
     $                         A( J1, J1 ), LDA, X( J1, RHS ),
     $                         SCALOC, CNORM, INFO )
               ELSE
                  CALL DLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
     $                         A( J1, J1 ), LDA, X( J1, RHS ),
     $                         SCALOC, CNORM, INFO )
               END IF
 *              Find largest absolute value entry in the vector segment
 *              X( J1:J2-1, RHS ) as an upper bound for the worst case
 *              growth in the linear updates.
               XNRM( KK ) = DLANGE( 'I', J2-J1, 1, X( J1, RHS ),
     $                              LDX, W )
 *
               IF( SCALOC .EQ. ZERO ) THEN
 *                 LATRS found that A is singular through A(j,j) = 0.
 *                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
 *                 and compute A*x = 0 (or A**T*x = 0). Note that
 *                 X(J1:J2-1, KK) is set by LATRS.
                  SCALE( RHS ) = ZERO
                  DO II = 1, J1-1
                     X( II, KK ) = ZERO
                  END DO
                  DO II = J2, N
                     X( II, KK ) = ZERO
                  END DO
 *                 Discard the local scale factors.
                  DO II = 1, NBA
                     WORK( II+KK*LDS ) = ONE
                  END DO
                  SCALOC = ONE
               ELSE IF( SCALOC * WORK( J+KK*LDS ) .EQ. ZERO ) THEN
 *                 LATRS computed a valid scale factor, but combined with
 *                 the current scaling the solution does not have a
 *                 scale factor > 0.
 *
 *                 Set WORK( J+KK*LDS ) to smallest valid scale
 *                 factor and increase SCALOC accordingly.
                  SCAL = WORK( J+KK*LDS ) / SMLNUM
                  SCALOC = SCALOC * SCAL
                  WORK( J+KK*LDS ) = SMLNUM
 *                 If LATRS overestimated the growth, x may be
 *                 rescaled to preserve a valid combined scale
 *                 factor WORK( J, KK ) > 0.
                  RSCAL = ONE / SCALOC
                  IF( XNRM( KK ) * RSCAL .LE. BIGNUM ) THEN
                     XNRM( KK ) = XNRM( KK ) * RSCAL
                     CALL DSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
                     SCALOC = ONE
                  ELSE
 *                    The system op(A) * x = b is badly scaled and its
 *                    solution cannot be represented as (1/scale) * x.
 *                    Set x to zero. This approach deviates from LATRS
 *                    where a completely meaningless non-zero vector
 *                    is returned that is not a solution to op(A) * x = b.
                     SCALE( RHS ) = ZERO
                     DO II = 1, N
                        X( II, KK ) = ZERO
                     END DO
 *                    Discard the local scale factors.
                     DO II = 1, NBA
                        WORK( II+KK*LDS ) = ONE
                     END DO
                     SCALOC = ONE
                  END IF
               END IF
               SCALOC = SCALOC * WORK( J+KK*LDS )
               WORK( J+KK*LDS ) = SCALOC
            END DO
 *
 *           Linear block updates
 *
            IF( NOTRAN ) THEN
               IF( UPPER ) THEN
                  IFIRST = J - 1
                  ILAST = 1
                  IINC = -1
               ELSE
                  IFIRST = J + 1
                  ILAST = NBA
                  IINC = 1
               END IF
            ELSE
               IF( UPPER ) THEN
                  IFIRST = J + 1
                  ILAST = NBA
                  IINC = 1
               ELSE
                  IFIRST = J - 1
                  ILAST = 1
                  IINC = -1
               END IF
            END IF
 *
            DO I = IFIRST, ILAST, IINC
 *              I1: row index of the first column in X( I, K )
 *              I2: row index of the first column in X( I+1, K )
 *              so the I2 - I1 is the row count of the block X( I, K )
               I1 = (I-1)*NB + 1
               I2 = MIN( I*NB, N ) + 1
 *
 *              Prepare the linear update to be executed with GEMM.
 *              For each column, compute a consistent scaling, a
 *              scaling factor to survive the linear update, and
 *              rescale the column segments, if necesssary. Then
 *              the linear update is safely executed.
 *
               DO KK = 1, K2-K1
                  RHS = K1 + KK - 1
 *                 Compute consistent scaling
                  SCAMIN = MIN( WORK( I + KK*LDS), WORK( J + KK*LDS ) )
 *
 *                 Compute scaling factor to survive the linear update
 *                 simulating consistent scaling.
 *
                  BNRM = DLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
                  BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
                  XNRM( KK ) = XNRM( KK )*(SCAMIN / WORK( J+KK*LDS ))
                  ANRM = WORK( AWRK + I+(J-1)*NBA )
                  SCALOC = DLARMM( ANRM, XNRM( KK ), BNRM )
 *
 *                 Simultaneously apply the robust update factor and the
 *                 consistency scaling factor to B( I, KK ) and B( J, KK ).
 *
                  SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
                  IF( SCAL.NE.ONE ) THEN
                     CALL DSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
                     WORK( I+KK*LDS ) = SCAMIN*SCALOC
                  END IF
 *
                  SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
                  IF( SCAL.NE.ONE ) THEN
                     CALL DSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
                     WORK( J+KK*LDS ) = SCAMIN*SCALOC
                  END IF
               END DO
 *
               IF( NOTRAN ) THEN
 *
 *                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
 *
                  CALL DGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -ONE,
     $                        A( I1, J1 ), LDA, X( J1, K1 ), LDX,
     $                        ONE, X( I1, K1 ), LDX )
               ELSE
 *
 *                 B( I, K ) := B( I, K ) - A( J, I )**T * X( J, K )
 *
                  CALL DGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -ONE,
     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
     $                        ONE, X( I1, K1 ), LDX )
               END IF
            END DO
         END DO
 *
 *        Reduce local scaling factors
 *
         DO KK = 1, K2-K1
            RHS = K1 + KK - 1
            DO I = 1, NBA
               SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
            END DO
         END DO
 *
 *        Realize consistent scaling
 *
         DO KK = 1, K2-K1
            RHS = K1 + KK - 1
            IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
               DO I = 1, NBA
                  I1 = (I-1)*NB + 1
                  I2 = MIN( I*NB, N ) + 1
                  SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
                  IF( SCAL.NE.ONE )
     $               CALL DSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
               END DO
            END IF
         END DO
      END DO
      RETURN
 *
 *     End of DLATRS3
 *
      END
--- a/lapack-netlib/SRC/dtrsyl3.c
+++ b/lapack-netlib/SRC/dtrsyl3.c
--- a/lapack-netlib/SRC/dtrsyl3.f
+++ b/lapack-netlib/SRC/dtrsyl3.f
--- a/lapack-netlib/SRC/ilaenv.f
+++ b/lapack-netlib/SRC/ilaenv.f
@@ -469,6 +469,15 @@
            ELSE
               NB = 64
            END IF
         ELSE IF( C3.EQ.'SYL' ) THEN
 *           The upper bound is to prevent overly aggressive scaling.
            IF( SNAME ) THEN
               NB = MIN( MAX( 48, INT( ( MIN( N1, N2 ) * 16 ) / 100) ),
     $                   240 )
            ELSE
               NB = MIN( MAX( 24, INT( ( MIN( N1, N2 ) * 8 ) / 100) ),
     $                   80 )
            END IF
         END IF
      ELSE IF( C2.EQ.'LA' ) THEN
         IF( C3.EQ.'UUM' ) THEN
@@ -477,6 +486,12 @@
            ELSE
               NB = 64
            END IF
         ELSE IF( C3.EQ.'TRS' ) THEN
            IF( SNAME ) THEN
               NB = 32
            ELSE
               NB = 32
            END IF
         END IF
      ELSE IF( SNAME .AND. C2.EQ.'ST' ) THEN
         IF( C3.EQ.'EBZ' ) THEN
--- a/lapack-netlib/SRC/slarmm.c
+++ b/lapack-netlib/SRC/slarmm.c
@@ -0,0 +1,605 @@
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 #if defined(_WIN64)
 typedef long long BLASLONG;
 typedef unsigned long long BLASULONG;
 #else
 typedef long BLASLONG;
 typedef unsigned long BLASULONG;
 #endif
 #ifdef LAPACK_ILP64
 typedef BLASLONG blasint;
 #if defined(_WIN64)
 #define blasabs(x) llabs(x)
 #else
 #define blasabs(x) labs(x)
 #endif
 #else
 typedef int blasint;
 #define blasabs(x) abs(x)
 #endif
 typedef blasint integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 #ifdef _MSC_VER
 static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
 static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
 static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
 static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
 #else
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #endif
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #ifdef _MSC_VER
 #define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
 #define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
 #else
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #endif
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimagf(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle_() continue;
 #define myceiling_(w) {ceil(w)}
 #define myhuge_(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
 #define myexp_(w) my_expfunc(w)
 static int my_expfunc(float *x) {int e; (void)frexpf(*x,&e); return e;}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 #ifdef _MSC_VER
 static _Fcomplex cpow_ui(complex x, integer n) {
 	complex pow={1.0,0.0}; unsigned long int u;
 		if(n != 0) {
 		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
 		for(u = n; ; ) {
 			if(u & 01) pow.r *= x.r, pow.i *= x.i;
 			if(u >>= 1) x.r *= x.r, x.i *= x.i;
 			else break;
 		}
 	}
 	_Fcomplex p={pow.r, pow.i};
 	return p;
 }
 #else
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 #endif
 #ifdef _MSC_VER
 static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
 	_Dcomplex pow={1.0,0.0}; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
 		for(u = n; ; ) {
 			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
 			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
 			else break;
 		}
 	}
 	_Dcomplex p = {pow._Val[0], pow._Val[1]};
 	return p;
 }
 #else
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 #endif
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 #ifdef _MSC_VER
 	_Fcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
 			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
 			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
 		}
 	}
 	pCf(z) = zdotc;
 }
 #else
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 #endif
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 #ifdef _MSC_VER
 	_Dcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
 			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
 			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
 		}
 	}
 	pCd(z) = zdotc;
 }
 #else
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 #ifdef _MSC_VER
 	_Fcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
 			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
 			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
 		}
 	}
 	pCf(z) = zdotc;
 }
 #else
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 #endif
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 #ifdef _MSC_VER
 	_Dcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
 			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
 			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
 		}
 	}
 	pCd(z) = zdotc;
 }
 #else
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b SLARMM */
 /* Definition: */
 /* =========== */
 /*      REAL FUNCTION SLARMM( ANORM, BNORM, CNORM ) */
 /*      REAL               ANORM, BNORM, CNORM */
 /* >  \par Purpose: */
 /*  ======= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SLARMM returns a factor s in (0, 1] such that the linear updates */
 /* > */
 /* >    (s * C) - A * (s * B)  and  (s * C) - (s * A) * B */
 /* > */
 /* > cannot overflow, where A, B, and C are matrices of conforming */
 /* > dimensions. */
 /* > */
 /* > This is an auxiliary routine so there is no argument checking. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========= */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is REAL */
 /* >          The infinity norm of A. ANORM >= 0. */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] BNORM */
 /* > \verbatim */
 /* >          BNORM is REAL */
 /* >          The infinity norm of B. BNORM >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] CNORM */
 /* > \verbatim */
 /* >          CNORM is REAL */
 /* >          The infinity norm of C. CNORM >= 0. */
 /* > \endverbatim */
 /* > */
 /* > */
 /*  ===================================================================== */
 /* >  References: */
 /* >    C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for */
 /* >    Robust Solution of Triangular Linear Systems. In: International */
 /* >    Conference on Parallel Processing and Applied Mathematics, pages */
 /* >    68--78. Springer, 2017. */
 /* > */
 /* > \ingroup OTHERauxiliary */
 /*  ===================================================================== */
 real slarmm_(real *anorm, real *bnorm, real *cnorm)
 {
    /* System generated locals */
    real ret_val;
    /* Local variables */
    extern real slamch_(char *);
    real bignum, smlnum;
 /*     Determine machine dependent parameters to control overflow. */
    smlnum = slamch_("Safe minimum") / slamch_("Precision");
    bignum = 1.f / smlnum / 4.f;
 /*     Compute a scale factor. */
    ret_val = 1.f;
    if (*bnorm <= 1.f) {
 	if (*anorm * *bnorm > bignum - *cnorm) {
 	    ret_val = .5f;
 	}
    } else {
 	if (*anorm > (bignum - *cnorm) / *bnorm) {
 	    ret_val = .5f / *bnorm;
 	}
    }
    return ret_val;
 /*     ==== End of SLARMM ==== */
 } /* slarmm_ */
--- a/lapack-netlib/SRC/slarmm.f
+++ b/lapack-netlib/SRC/slarmm.f
@@ -0,0 +1,99 @@
 *> \brief \b SLARMM
 *
 * Definition:
 * ===========
 *
 *      REAL FUNCTION SLARMM( ANORM, BNORM, CNORM )
 *
 *     .. Scalar Arguments ..
 *      REAL               ANORM, BNORM, CNORM
 *     ..
 *
 *>  \par Purpose:
 *  =======
 *>
 *> \verbatim
 *>
 *> SLARMM returns a factor s in (0, 1] such that the linear updates
 *>
 *>    (s * C) - A * (s * B)  and  (s * C) - (s * A) * B
 *>
 *> cannot overflow, where A, B, and C are matrices of conforming
 *> dimensions.
 *>
 *> This is an auxiliary routine so there is no argument checking.
 *> \endverbatim
 *
 *  Arguments:
 *  =========
 *
 *> \param[in] ANORM
 *> \verbatim
 *>          ANORM is REAL
 *>          The infinity norm of A. ANORM >= 0.
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] BNORM
 *> \verbatim
 *>          BNORM is REAL
 *>          The infinity norm of B. BNORM >= 0.
 *> \endverbatim
 *>
 *> \param[in] CNORM
 *> \verbatim
 *>          CNORM is REAL
 *>          The infinity norm of C. CNORM >= 0.
 *> \endverbatim
 *>
 *>
 *  =====================================================================
 *>  References:
 *>    C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for
 *>    Robust Solution of Triangular Linear Systems. In: International
 *>    Conference on Parallel Processing and Applied Mathematics, pages
 *>    68--78. Springer, 2017.
 *>
 *> \ingroup OTHERauxiliary
 *  =====================================================================
      REAL FUNCTION SLARMM( ANORM, BNORM, CNORM )
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      REAL               ANORM, BNORM, CNORM
 *     .. Parameters ..
      REAL               ONE, HALF, FOUR
      PARAMETER          ( ONE = 1.0E0, HALF = 0.5E+0, FOUR = 4.0E+0 )
 *     ..
 *     .. Local Scalars ..
      REAL               BIGNUM, SMLNUM
 *     ..
 *     .. External Functions ..
      REAL               SLAMCH
      EXTERNAL           SLAMCH
 *     ..
 *     .. Executable Statements ..
 *
 *
 *     Determine machine dependent parameters to control overflow.
 *
      SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
      BIGNUM = ( ONE / SMLNUM ) / FOUR
 *
 *     Compute a scale factor.
 *
      SLARMM = ONE
      IF( BNORM .LE. ONE ) THEN
         IF( ANORM * BNORM .GT. BIGNUM - CNORM ) THEN
            SLARMM = HALF
         END IF
      ELSE
         IF( ANORM .GT. (BIGNUM - CNORM) / BNORM ) THEN
            SLARMM = HALF / BNORM
         END IF
      END IF
      RETURN
 *
 *     ==== End of SLARMM ====
 *
      END
--- a/lapack-netlib/SRC/slatrs3.c
+++ b/lapack-netlib/SRC/slatrs3.c
--- a/lapack-netlib/SRC/slatrs3.f
+++ b/lapack-netlib/SRC/slatrs3.f
@@ -0,0 +1,656 @@
 *> \brief \b SLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
 *
 *  Definition:
 *  ===========
 *
 *      SUBROUTINE SLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
 *                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIAG, NORMIN, TRANS, UPLO
 *       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
 *       ..
 *       .. Array Arguments ..
 *       REAL               A( LDA, * ), CNORM( * ), SCALE( * ), 
 *                          WORK( * ), X( LDX, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> SLATRS3 solves one of the triangular systems
 *>
 *>    A * X = B * diag(scale)  or  A**T * X = B * diag(scale)
 *>
 *> with scaling to prevent overflow.  Here A is an upper or lower
 *> triangular matrix, A**T denotes the transpose of A. X and B are
 *> n by nrhs matrices and scale is an nrhs element vector of scaling
 *> factors. A scaling factor scale(j) is usually less than or equal
 *> to 1, chosen such that X(:,j) is less than the overflow threshold.
 *> If the matrix A is singular (A(j,j) = 0 for some j), then
 *> a non-trivial solution to A*X = 0 is returned. If the system is
 *> so badly scaled that the solution cannot be represented as
 *> (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
 *>
 *> This is a BLAS-3 version of LATRS for solving several right
 *> hand sides simultaneously.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] UPLO
 *> \verbatim
 *>          UPLO is CHARACTER*1
 *>          Specifies whether the matrix A is upper or lower triangular.
 *>          = 'U':  Upper triangular
 *>          = 'L':  Lower triangular
 *> \endverbatim
 *>
 *> \param[in] TRANS
 *> \verbatim
 *>          TRANS is CHARACTER*1
 *>          Specifies the operation applied to A.
 *>          = 'N':  Solve A * x = s*b  (No transpose)
 *>          = 'T':  Solve A**T* x = s*b  (Transpose)
 *>          = 'C':  Solve A**T* x = s*b  (Conjugate transpose = Transpose)
 *> \endverbatim
 *>
 *> \param[in] DIAG
 *> \verbatim
 *>          DIAG is CHARACTER*1
 *>          Specifies whether or not the matrix A is unit triangular.
 *>          = 'N':  Non-unit triangular
 *>          = 'U':  Unit triangular
 *> \endverbatim
 *>
 *> \param[in] NORMIN
 *> \verbatim
 *>          NORMIN is CHARACTER*1
 *>          Specifies whether CNORM has been set or not.
 *>          = 'Y':  CNORM contains the column norms on entry
 *>          = 'N':  CNORM is not set on entry.  On exit, the norms will
 *>                  be computed and stored in CNORM.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] NRHS
 *> \verbatim
 *>          NRHS is INTEGER
 *>          The number of columns of X.  NRHS >= 0.
 *> \endverbatim
 *>
 *> \param[in] A
 *> \verbatim
 *>          A is REAL array, dimension (LDA,N)
 *>          The triangular matrix A.  If UPLO = 'U', the leading n by n
 *>          upper triangular part of the array A contains the upper
 *>          triangular matrix, and the strictly lower triangular part of
 *>          A is not referenced.  If UPLO = 'L', the leading n by n lower
 *>          triangular part of the array A contains the lower triangular
 *>          matrix, and the strictly upper triangular part of A is not
 *>          referenced.  If DIAG = 'U', the diagonal elements of A are
 *>          also not referenced and are assumed to be 1.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max (1,N).
 *> \endverbatim
 *>
 *> \param[in,out] X
 *> \verbatim
 *>          X is REAL array, dimension (LDX,NRHS)
 *>          On entry, the right hand side B of the triangular system.
 *>          On exit, X is overwritten by the solution matrix X.
 *> \endverbatim
 *>
 *> \param[in] LDX
 *> \verbatim
 *>          LDX is INTEGER
 *>          The leading dimension of the array X.  LDX >= max (1,N).
 *> \endverbatim
 *>
 *> \param[out] SCALE
 *> \verbatim
 *>          SCALE is REAL array, dimension (NRHS)
 *>          The scaling factor s(k) is for the triangular system
 *>          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k).
 *>          If SCALE = 0, the matrix A is singular or badly scaled.
 *>          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
 *>          that is an exact or approximate solution to A*x(:,k) = 0
 *>          is returned. If the system so badly scaled that solution
 *>          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
 *>          is returned.
 *> \endverbatim
 *>
 *> \param[in,out] CNORM
 *> \verbatim
 *>          CNORM is REAL array, dimension (N)
 *>
 *>          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
 *>          contains the norm of the off-diagonal part of the j-th column
 *>          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
 *>          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
 *>          must be greater than or equal to the 1-norm.
 *>
 *>          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
 *>          returns the 1-norm of the offdiagonal part of the j-th column
 *>          of A.
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is REAL array, dimension (LWORK).
 *>          On exit, if INFO = 0, WORK(1) returns the optimal size of
 *>          WORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *>          LWORK is INTEGER
 *>          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
 *>          NBA = (N + NB - 1)/NB and NB is the optimal block size.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal dimensions of the WORK array, returns
 *>          this value as the first entry of the WORK array, and no error
 *>          message related to LWORK is issued by XERBLA.
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -k, the k-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleOTHERauxiliary
 *> \par Further Details:
 *  =====================
 *  \verbatim
 *  The algorithm follows the structure of a block triangular solve.
 *  The diagonal block is solved with a call to the robust the triangular
 *  solver LATRS for every right-hand side RHS = 1, ..., NRHS
 *     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
 *  where op( A ) = A or op( A ) = A**T.
 *  The linear block updates operate on block columns of X,
 *     B( I, K ) - op(A( I, J )) * X( J, K )
 *  and use GEMM. To avoid overflow in the linear block update, the worst case
 *  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
 *  such that
 *     || s * B( I, RHS )||_oo
 *   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold
 *
 *  Once all columns of a block column have been rescaled (BLAS-1), the linear
 *  update is executed with GEMM without overflow.
 *
 *  To limit rescaling, local scale factors track the scaling of column segments.
 *  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
 *  per right-hand side column RHS = 1, ..., NRHS. The global scale factor
 *  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
 *  I = 1, ..., NBA.
 *  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
 *  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
 *  linear update of potentially inconsistently scaled vector segments
 *     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
 *  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
 *  if necessary, rescales the blocks prior to calling GEMM.
 *
 *  \endverbatim
 *  =====================================================================
 *  References:
 *  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
 *  Parallel robust solution of triangular linear systems. Concurrency
 *  and Computation: Practice and Experience, 31(19), e5064.
 *
 *  Contributor:
 *   Angelika Schwarz, Umea University, Sweden.
 *
 *  =====================================================================
      SUBROUTINE SLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
     $                    X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
      IMPLICIT NONE
 *
 *     .. Scalar Arguments ..
      CHARACTER          DIAG, TRANS, NORMIN, UPLO
      INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
 *     ..
 *     .. Array Arguments ..
      REAL               A( LDA, * ), CNORM( * ), X( LDX, * ),
     $                   SCALE( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      REAL               ZERO, ONE
      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
      INTEGER            NBMAX, NBMIN, NBRHS, NRHSMIN
      PARAMETER          ( NRHSMIN = 2, NBRHS = 32 )
      PARAMETER          ( NBMIN = 8, NBMAX = 64 )
 *     ..
 *     .. Local Arrays ..
      REAL               W( NBMAX ), XNRM( NBRHS )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            LQUERY, NOTRAN, NOUNIT, UPPER
      INTEGER            AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
     $                   JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
     $                   LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
      REAL               ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
     $                   SCAMIN, SMLNUM, TMAX
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ILAENV
      REAL               SLAMCH, SLANGE, SLARMM
      EXTERNAL           ILAENV, LSAME, SLAMCH, SLANGE, SLARMM
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           SLATRS, SSCAL, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
      INFO = 0
      UPPER = LSAME( UPLO, 'U' )
      NOTRAN = LSAME( TRANS, 'N' )
      NOUNIT = LSAME( DIAG, 'N' )
      LQUERY = ( LWORK.EQ.-1 )
 *
 *     Partition A and X into blocks.
 *
      NB = MAX( 8, ILAENV( 1, 'SLATRS', '', N, N, -1, -1 ) )
      NB = MIN( NBMAX, NB )
      NBA = MAX( 1, (N + NB - 1) / NB )
      NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
 *
 *     Compute the workspace
 *
 *     The workspace comprises two parts.
 *     The first part stores the local scale factors. Each simultaneously
 *     computed right-hand side requires one local scale factor per block
 *     row. WORK( I + KK * LDS ) is the scale factor of the vector
 *     segment associated with the I-th block row and the KK-th vector
 *     in the block column.
      LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
      LDS = NBA
 *     The second part stores upper bounds of the triangular A. There are
 *     a total of NBA x NBA blocks, of which only the upper triangular
 *     part or the lower triangular part is referenced. The upper bound of
 *     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
      LANRM = NBA * NBA
      AWRK = LSCALE
      WORK( 1 ) = LSCALE + LANRM
 *
 *     Test the input parameters.
 *
      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
         INFO = -1
      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
     $         LSAME( TRANS, 'C' ) ) THEN
         INFO = -2
      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
         INFO = -3
      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
     $         LSAME( NORMIN, 'N' ) ) THEN
         INFO = -4
      ELSE IF( N.LT.0 ) THEN
         INFO = -5
      ELSE IF( NRHS.LT.0 ) THEN
         INFO = -6
      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
         INFO = -8
      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
         INFO = -10
      ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
         INFO = -14
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'SLATRS3', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         RETURN
      END IF
 *
 *     Initialize scaling factors
 *
      DO KK = 1, NRHS
         SCALE( KK ) = ONE
      END DO
 *
 *     Quick return if possible
 *
      IF( MIN( N, NRHS ).EQ.0 )
     $   RETURN
 *
 *     Determine machine dependent constant to control overflow.
 *
      BIGNUM = SLAMCH( 'Overflow' )
      SMLNUM = SLAMCH( 'Safe Minimum' )
 *
 *     Use unblocked code for small problems
 *
      IF( NRHS.LT.NRHSMIN ) THEN
         CALL SLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
     $                SCALE( 1 ), CNORM, INFO )
         DO K = 2, NRHS
            CALL SLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
     $                   SCALE( K ), CNORM, INFO )
         END DO
         RETURN
      END IF
 *
 *     Compute norms of blocks of A excluding diagonal blocks and find
 *     the block with the largest norm TMAX.
 *
      TMAX = ZERO
      DO J = 1, NBA
         J1 = (J-1)*NB + 1
         J2 = MIN( J*NB, N ) + 1
         IF ( UPPER ) THEN
            IFIRST = 1
            ILAST = J - 1
         ELSE
            IFIRST = J + 1
            ILAST = NBA
         END IF
         DO I = IFIRST, ILAST
            I1 = (I-1)*NB + 1
            I2 = MIN( I*NB, N ) + 1
 *
 *           Compute upper bound of A( I1:I2-1, J1:J2-1 ).
 *
            IF( NOTRAN ) THEN
               ANRM = SLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
               WORK( AWRK + I+(J-1)*NBA ) = ANRM
            ELSE
               ANRM = SLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
               WORK( AWRK + J+(I-1)*NBA ) = ANRM
            END IF
            TMAX = MAX( TMAX, ANRM )
         END DO
      END DO
 *
      IF( .NOT. TMAX.LE.SLAMCH('Overflow') ) THEN
 *
 *        Some matrix entries have huge absolute value. At least one upper
 *        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
 *        number, either due to overflow in LANGE or due to Inf in A.
 *        Fall back to LATRS. Set normin = 'N' for every right-hand side to
 *        force computation of TSCAL in LATRS to avoid the likely overflow
 *        in the computation of the column norms CNORM.
 *
         DO K = 1, NRHS
            CALL SLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
     $                   SCALE( K ), CNORM, INFO )
         END DO
         RETURN
      END IF
 *
 *     Every right-hand side requires workspace to store NBA local scale
 *     factors. To save workspace, X is computed successively in block columns
 *     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
 *     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
      DO K = 1, NBX
 *        Loop over block columns (index = K) of X and, for column-wise scalings,
 *        over individual columns (index = KK).
 *        K1: column index of the first column in X( J, K )
 *        K2: column index of the first column in X( J, K+1 )
 *        so the K2 - K1 is the column count of the block X( J, K )
         K1 = (K-1)*NBRHS + 1
         K2 = MIN( K*NBRHS, NRHS ) + 1
 *
 *        Initialize local scaling factors of current block column X( J, K )
 *
         DO KK = 1, K2 - K1
            DO I = 1, NBA
               WORK( I+KK*LDS ) = ONE
            END DO
         END DO
 *
         IF( NOTRAN ) THEN
 *
 *           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
 *
            IF( UPPER ) THEN
               JFIRST = NBA
               JLAST = 1
               JINC = -1
            ELSE
               JFIRST = 1
               JLAST = NBA
               JINC = 1
            END IF
         ELSE
 *
 *           Solve A**T * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
 *
            IF( UPPER ) THEN
               JFIRST = 1
               JLAST = NBA
               JINC = 1
            ELSE
               JFIRST = NBA
               JLAST = 1
               JINC = -1
            END IF
         END IF
 *
         DO J = JFIRST, JLAST, JINC
 *           J1: row index of the first row in A( J, J )
 *           J2: row index of the first row in A( J+1, J+1 )
 *           so that J2 - J1 is the row count of the block A( J, J )
            J1 = (J-1)*NB + 1
            J2 = MIN( J*NB, N ) + 1
 *
 *           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
 *           for all right-hand sides in the current block column,
 *           one RHS at a time.
 *
            DO KK = 1, K2-K1
               RHS = K1 + KK - 1
               IF( KK.EQ.1 ) THEN
                  CALL SLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
     $                         A( J1, J1 ), LDA, X( J1, RHS ),
     $                         SCALOC, CNORM, INFO )
               ELSE
                  CALL SLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
     $                         A( J1, J1 ), LDA, X( J1, RHS ),
     $                         SCALOC, CNORM, INFO )
               END IF
 *              Find largest absolute value entry in the vector segment
 *              X( J1:J2-1, RHS ) as an upper bound for the worst case
 *              growth in the linear updates.
               XNRM( KK ) = SLANGE( 'I', J2-J1, 1, X( J1, RHS ),
     $                              LDX, W )
 *
               IF( SCALOC .EQ. ZERO ) THEN
 *                 LATRS found that A is singular through A(j,j) = 0.
 *                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
 *                 and compute A*x = 0 (or A**T*x = 0). Note that
 *                 X(J1:J2-1, KK) is set by LATRS.
                  SCALE( RHS ) = ZERO
                  DO II = 1, J1-1
                     X( II, KK ) = ZERO
                  END DO
                  DO II = J2, N
                     X( II, KK ) = ZERO
                  END DO
 *                 Discard the local scale factors.
                  DO II = 1, NBA
                     WORK( II+KK*LDS ) = ONE
                  END DO
                  SCALOC = ONE
               ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
 *                 LATRS computed a valid scale factor, but combined with
 *                 the current scaling the solution does not have a
 *                 scale factor > 0.
 *
 *                 Set WORK( J+KK*LDS ) to smallest valid scale
 *                 factor and increase SCALOC accordingly.
                  SCAL = WORK( J+KK*LDS ) / SMLNUM
                  SCALOC = SCALOC * SCAL
                  WORK( J+KK*LDS ) = SMLNUM
 *                 If LATRS overestimated the growth, x may be
 *                 rescaled to preserve a valid combined scale
 *                 factor WORK( J, KK ) > 0.
                  RSCAL = ONE / SCALOC
                  IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
                     XNRM( KK ) = XNRM( KK ) * RSCAL
                     CALL SSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
                     SCALOC = ONE
                  ELSE
 *                    The system op(A) * x = b is badly scaled and its
 *                    solution cannot be represented as (1/scale) * x.
 *                    Set x to zero. This approach deviates from LATRS
 *                    where a completely meaningless non-zero vector
 *                    is returned that is not a solution to op(A) * x = b.
                     SCALE( RHS ) = ZERO
                     DO II = 1, N
                        X( II, KK ) = ZERO
                     END DO
 *                    Discard the local scale factors.
                     DO II = 1, NBA
                        WORK( II+KK*LDS ) = ONE
                     END DO
                     SCALOC = ONE
                  END IF
               END IF
               SCALOC = SCALOC * WORK( J+KK*LDS )
               WORK( J+KK*LDS ) = SCALOC
            END DO
 *
 *           Linear block updates
 *
            IF( NOTRAN ) THEN
               IF( UPPER ) THEN
                  IFIRST = J - 1
                  ILAST = 1
                  IINC = -1
               ELSE
                  IFIRST = J + 1
                  ILAST = NBA
                  IINC = 1
               END IF
            ELSE
               IF( UPPER ) THEN
                  IFIRST = J + 1
                  ILAST = NBA
                  IINC = 1
               ELSE
                  IFIRST = J - 1
                  ILAST = 1
                  IINC = -1
               END IF
            END IF
 *
            DO I = IFIRST, ILAST, IINC
 *              I1: row index of the first column in X( I, K )
 *              I2: row index of the first column in X( I+1, K )
 *              so the I2 - I1 is the row count of the block X( I, K )
               I1 = (I-1)*NB + 1
               I2 = MIN( I*NB, N ) + 1
 *
 *              Prepare the linear update to be executed with GEMM.
 *              For each column, compute a consistent scaling, a
 *              scaling factor to survive the linear update, and
 *              rescale the column segments, if necesssary. Then
 *              the linear update is safely executed.
 *
               DO KK = 1, K2-K1
                  RHS = K1 + KK - 1
 *                 Compute consistent scaling
                  SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
 *
 *                 Compute scaling factor to survive the linear update
 *                 simulating consistent scaling.
 *
                  BNRM = SLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
                  BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
                  XNRM( KK ) = XNRM( KK )*(SCAMIN / WORK( J+KK*LDS ))
                  ANRM = WORK( AWRK + I+(J-1)*NBA )
                  SCALOC = SLARMM( ANRM, XNRM( KK ), BNRM )
 *
 *                 Simultaneously apply the robust update factor and the
 *                 consistency scaling factor to B( I, KK ) and B( J, KK ).
 *
                  SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
                  IF( SCAL.NE.ONE ) THEN
                     CALL SSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
                     WORK( I+KK*LDS ) = SCAMIN*SCALOC
                  END IF
 *
                  SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
                  IF( SCAL.NE.ONE ) THEN
                     CALL SSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
                     WORK( J+KK*LDS ) = SCAMIN*SCALOC
                  END IF
               END DO
 *
               IF( NOTRAN ) THEN
 *
 *                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
 *
                  CALL SGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -ONE,
     $                        A( I1, J1 ), LDA, X( J1, K1 ), LDX,
     $                        ONE, X( I1, K1 ), LDX )
               ELSE
 *
 *                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
 *
                  CALL SGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -ONE,
     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
     $                        ONE, X( I1, K1 ), LDX )
               END IF
            END DO
         END DO
 *
 *        Reduce local scaling factors
 *
         DO KK = 1, K2-K1
            RHS = K1 + KK - 1
            DO I = 1, NBA
               SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
            END DO
         END DO
 *
 *        Realize consistent scaling
 *
         DO KK = 1, K2-K1
            RHS = K1 + KK - 1
            IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
               DO I = 1, NBA
                  I1 = (I-1)*NB + 1
                  I2 = MIN( I*NB, N ) + 1
                  SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
                  IF( SCAL.NE.ONE )
     $               CALL SSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
               END DO
            END IF
         END DO
      END DO
      RETURN
 *
 *     End of SLATRS3
 *
      END
--- a/lapack-netlib/SRC/strsyl3.c
+++ b/lapack-netlib/SRC/strsyl3.c
--- a/lapack-netlib/SRC/strsyl3.f
+++ b/lapack-netlib/SRC/strsyl3.f
--- a/lapack-netlib/SRC/zlatrs3.c
+++ b/lapack-netlib/SRC/zlatrs3.c
--- a/lapack-netlib/SRC/zlatrs3.f
+++ b/lapack-netlib/SRC/zlatrs3.f
@@ -0,0 +1,667 @@
 *> \brief \b ZLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
 *
 *  Definition:
 *  ===========
 *
 *      SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
 *                          X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIAG, NORMIN, TRANS, UPLO
 *       INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   CNORM( * ), SCALE( * ), WORK( * )
 *       COMPLEX*16         A( LDA, * ), X( LDX, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> ZLATRS3 solves one of the triangular systems
 *>
 *>    A * X = B * diag(scale),  A**T * X = B * diag(scale), or
 *>    A**H * X = B * diag(scale)
 *>
 *> with scaling to prevent overflow.  Here A is an upper or lower
 *> triangular matrix, A**T denotes the transpose of A, A**H denotes the
 *> conjugate transpose of A. X and B are n-by-nrhs matrices and scale
 *> is an nrhs-element vector of scaling factors. A scaling factor scale(j)
 *> is usually less than or equal to 1, chosen such that X(:,j) is less
 *> than the overflow threshold. If the matrix A is singular (A(j,j) = 0
 *> for some j), then a non-trivial solution to A*X = 0 is returned. If
 *> the system is so badly scaled that the solution cannot be represented
 *> as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
 *>
 *> This is a BLAS-3 version of LATRS for solving several right
 *> hand sides simultaneously.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] UPLO
 *> \verbatim
 *>          UPLO is CHARACTER*1
 *>          Specifies whether the matrix A is upper or lower triangular.
 *>          = 'U':  Upper triangular
 *>          = 'L':  Lower triangular
 *> \endverbatim
 *>
 *> \param[in] TRANS
 *> \verbatim
 *>          TRANS is CHARACTER*1
 *>          Specifies the operation applied to A.
 *>          = 'N':  Solve A * x = s*b  (No transpose)
 *>          = 'T':  Solve A**T* x = s*b  (Transpose)
 *>          = 'C':  Solve A**T* x = s*b  (Conjugate transpose)
 *> \endverbatim
 *>
 *> \param[in] DIAG
 *> \verbatim
 *>          DIAG is CHARACTER*1
 *>          Specifies whether or not the matrix A is unit triangular.
 *>          = 'N':  Non-unit triangular
 *>          = 'U':  Unit triangular
 *> \endverbatim
 *>
 *> \param[in] NORMIN
 *> \verbatim
 *>          NORMIN is CHARACTER*1
 *>          Specifies whether CNORM has been set or not.
 *>          = 'Y':  CNORM contains the column norms on entry
 *>          = 'N':  CNORM is not set on entry.  On exit, the norms will
 *>                  be computed and stored in CNORM.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] NRHS
 *> \verbatim
 *>          NRHS is INTEGER
 *>          The number of columns of X.  NRHS >= 0.
 *> \endverbatim
 *>
 *> \param[in] A
 *> \verbatim
 *>          A is COMPLEX*16 array, dimension (LDA,N)
 *>          The triangular matrix A.  If UPLO = 'U', the leading n by n
 *>          upper triangular part of the array A contains the upper
 *>          triangular matrix, and the strictly lower triangular part of
 *>          A is not referenced.  If UPLO = 'L', the leading n by n lower
 *>          triangular part of the array A contains the lower triangular
 *>          matrix, and the strictly upper triangular part of A is not
 *>          referenced.  If DIAG = 'U', the diagonal elements of A are
 *>          also not referenced and are assumed to be 1.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max (1,N).
 *> \endverbatim
 *>
 *> \param[in,out] X
 *> \verbatim
 *>          X is COMPLEX*16 array, dimension (LDX,NRHS)
 *>          On entry, the right hand side B of the triangular system.
 *>          On exit, X is overwritten by the solution matrix X.
 *> \endverbatim
 *>
 *> \param[in] LDX
 *> \verbatim
 *>          LDX is INTEGER
 *>          The leading dimension of the array X.  LDX >= max (1,N).
 *> \endverbatim
 *>
 *> \param[out] SCALE
 *> \verbatim
 *>          SCALE is DOUBLE PRECISION array, dimension (NRHS)
 *>          The scaling factor s(k) is for the triangular system
 *>          A * x(:,k) = s(k)*b(:,k)  or  A**T* x(:,k) = s(k)*b(:,k).
 *>          If SCALE = 0, the matrix A is singular or badly scaled.
 *>          If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
 *>          that is an exact or approximate solution to A*x(:,k) = 0
 *>          is returned. If the system so badly scaled that solution
 *>          cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
 *>          is returned.
 *> \endverbatim
 *>
 *> \param[in,out] CNORM
 *> \verbatim
 *>          CNORM is DOUBLE PRECISION array, dimension (N)
 *>
 *>          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
 *>          contains the norm of the off-diagonal part of the j-th column
 *>          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
 *>          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
 *>          must be greater than or equal to the 1-norm.
 *>
 *>          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
 *>          returns the 1-norm of the offdiagonal part of the j-th column
 *>          of A.
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          WORK is DOUBLE PRECISION array, dimension (LWORK).
 *>          On exit, if INFO = 0, WORK(1) returns the optimal size of
 *>          WORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *>          LWORK is INTEGER
 *>          LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
 *>          NBA = (N + NB - 1)/NB and NB is the optimal block size.
 *>
 *>          If LWORK = -1, then a workspace query is assumed; the routine
 *>          only calculates the optimal dimensions of the WORK array, returns
 *>          this value as the first entry of the WORK array, and no error
 *>          message related to LWORK is issued by XERBLA.
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -k, the k-th argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleOTHERauxiliary
 *> \par Further Details:
 *  =====================
 *  \verbatim
 *  The algorithm follows the structure of a block triangular solve.
 *  The diagonal block is solved with a call to the robust the triangular
 *  solver LATRS for every right-hand side RHS = 1, ..., NRHS
 *     op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
 *  where op( A ) = A or op( A ) = A**T or op( A ) = A**H.
 *  The linear block updates operate on block columns of X,
 *     B( I, K ) - op(A( I, J )) * X( J, K )
 *  and use GEMM. To avoid overflow in the linear block update, the worst case
 *  growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
 *  such that
 *     || s * B( I, RHS )||_oo
 *   + || op(A( I, J )) ||_oo * || s *  X( J, RHS ) ||_oo <= Overflow threshold
 *
 *  Once all columns of a block column have been rescaled (BLAS-1), the linear
 *  update is executed with GEMM without overflow.
 *
 *  To limit rescaling, local scale factors track the scaling of column segments.
 *  There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
 *  per right-hand side column RHS = 1, ..., NRHS. The global scale factor
 *  SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
 *  I = 1, ..., NBA.
 *  A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
 *  updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
 *  linear update of potentially inconsistently scaled vector segments
 *     s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
 *  computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
 *  if necessary, rescales the blocks prior to calling GEMM.
 *
 *  \endverbatim
 *  =====================================================================
 *  References:
 *  C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
 *  Parallel robust solution of triangular linear systems. Concurrency
 *  and Computation: Practice and Experience, 31(19), e5064.
 *
 *  Contributor:
 *   Angelika Schwarz, Umea University, Sweden.
 *
 *  =====================================================================
      SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
     $                    X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
      IMPLICIT NONE
 *
 *     .. Scalar Arguments ..
      CHARACTER          DIAG, TRANS, NORMIN, UPLO
      INTEGER            INFO, LDA, LWORK, LDX, N, NRHS
 *     ..
 *     .. Array Arguments ..
      COMPLEX*16         A( LDA, * ), X( LDX, * )
      DOUBLE PRECISION   CNORM( * ), SCALE( * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE
      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
      COMPLEX*16         CZERO, CONE
      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ) )
      INTEGER            NBMAX, NBMIN, NBRHS, NRHSMIN
      PARAMETER          ( NRHSMIN = 2, NBRHS = 32 )
      PARAMETER          ( NBMIN = 8, NBMAX = 64 )
 *     ..
 *     .. Local Arrays ..
      DOUBLE PRECISION   W( NBMAX ), XNRM( NBRHS )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            LQUERY, NOTRAN, NOUNIT, UPPER
      INTEGER            AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
     $                   JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
     $                   LANRM, LDS, LSCALE, NB, NBA, NBX, RHS
      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
     $                   SCAMIN, SMLNUM, TMAX
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ILAENV
      DOUBLE PRECISION   DLAMCH, ZLANGE, DLARMM
      EXTERNAL           ILAENV, LSAME, DLAMCH, ZLANGE, DLARMM
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ZLATRS, ZDSCAL, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
      INFO = 0
      UPPER = LSAME( UPLO, 'U' )
      NOTRAN = LSAME( TRANS, 'N' )
      NOUNIT = LSAME( DIAG, 'N' )
      LQUERY = ( LWORK.EQ.-1 )
 *
 *     Partition A and X into blocks.
 *
      NB = MAX( NBMIN, ILAENV( 1, 'ZLATRS', '', N, N, -1, -1 ) )
      NB = MIN( NBMAX, NB )
      NBA = MAX( 1, (N + NB - 1) / NB )
      NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
 *
 *     Compute the workspace
 *
 *     The workspace comprises two parts.
 *     The first part stores the local scale factors. Each simultaneously
 *     computed right-hand side requires one local scale factor per block
 *     row. WORK( I + KK * LDS ) is the scale factor of the vector
 *     segment associated with the I-th block row and the KK-th vector
 *     in the block column.
      LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
      LDS = NBA
 *     The second part stores upper bounds of the triangular A. There are
 *     a total of NBA x NBA blocks, of which only the upper triangular
 *     part or the lower triangular part is referenced. The upper bound of
 *     the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
      LANRM = NBA * NBA
      AWRK = LSCALE
      WORK( 1 ) = LSCALE + LANRM
 *
 *     Test the input parameters.
 *
      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
         INFO = -1
      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
     $         LSAME( TRANS, 'C' ) ) THEN
         INFO = -2
      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
         INFO = -3
      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
     $         LSAME( NORMIN, 'N' ) ) THEN
         INFO = -4
      ELSE IF( N.LT.0 ) THEN
         INFO = -5
      ELSE IF( NRHS.LT.0 ) THEN
         INFO = -6
      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
         INFO = -8
      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
         INFO = -10
      ELSE IF( .NOT.LQUERY .AND. LWORK.LT.WORK( 1 ) ) THEN
         INFO = -14
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'ZLATRS3', -INFO )
         RETURN
      ELSE IF( LQUERY ) THEN
         RETURN
      END IF
 *
 *     Initialize scaling factors
 *
      DO KK = 1, NRHS
         SCALE( KK ) = ONE
      END DO
 *
 *     Quick return if possible
 *
      IF( MIN( N, NRHS ).EQ.0 )
     $   RETURN
 *
 *     Determine machine dependent constant to control overflow.
 *
      BIGNUM = DLAMCH( 'Overflow' )
      SMLNUM = DLAMCH( 'Safe Minimum' )
 *
 *     Use unblocked code for small problems
 *
      IF( NRHS.LT.NRHSMIN ) THEN
         CALL ZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
     $                SCALE( 1 ), CNORM, INFO )
         DO K = 2, NRHS
            CALL ZLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
     $                   SCALE( K ), CNORM, INFO )
         END DO
         RETURN
      END IF
 *
 *     Compute norms of blocks of A excluding diagonal blocks and find
 *     the block with the largest norm TMAX.
 *
      TMAX = ZERO
      DO J = 1, NBA
         J1 = (J-1)*NB + 1
         J2 = MIN( J*NB, N ) + 1
         IF ( UPPER ) THEN
            IFIRST = 1
            ILAST = J - 1
         ELSE
            IFIRST = J + 1
            ILAST = NBA
         END IF
         DO I = IFIRST, ILAST
            I1 = (I-1)*NB + 1
            I2 = MIN( I*NB, N ) + 1
 *
 *           Compute upper bound of A( I1:I2-1, J1:J2-1 ).
 *
            IF( NOTRAN ) THEN
               ANRM = ZLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
               WORK( AWRK + I+(J-1)*NBA ) = ANRM
            ELSE
               ANRM = ZLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
               WORK( AWRK + J+(I-1) * NBA ) = ANRM
            END IF
            TMAX = MAX( TMAX, ANRM )
         END DO
      END DO
 *
      IF( .NOT. TMAX.LE.DLAMCH('Overflow') ) THEN
 *
 *        Some matrix entries have huge absolute value. At least one upper
 *        bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
 *        number, either due to overflow in LANGE or due to Inf in A.
 *        Fall back to LATRS. Set normin = 'N' for every right-hand side to
 *        force computation of TSCAL in LATRS to avoid the likely overflow
 *        in the computation of the column norms CNORM.
 *
         DO K = 1, NRHS
            CALL ZLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
     $                   SCALE( K ), CNORM, INFO )
         END DO
         RETURN
      END IF
 *
 *     Every right-hand side requires workspace to store NBA local scale
 *     factors. To save workspace, X is computed successively in block columns
 *     of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
 *     workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
      DO K = 1, NBX
 *        Loop over block columns (index = K) of X and, for column-wise scalings,
 *        over individual columns (index = KK).
 *        K1: column index of the first column in X( J, K )
 *        K2: column index of the first column in X( J, K+1 )
 *        so the K2 - K1 is the column count of the block X( J, K )
         K1 = (K-1)*NBRHS + 1
         K2 = MIN( K*NBRHS, NRHS ) + 1
 *
 *        Initialize local scaling factors of current block column X( J, K )
 *
         DO KK = 1, K2 - K1
            DO I = 1, NBA
               WORK( I+KK*LDS ) = ONE
            END DO
         END DO
 *
         IF( NOTRAN ) THEN
 *
 *           Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
 *
            IF( UPPER ) THEN
               JFIRST = NBA
               JLAST = 1
               JINC = -1
            ELSE
               JFIRST = 1
               JLAST = NBA
               JINC = 1
            END IF
         ELSE
 *
 *           Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
 *           where op(A) = A**T or op(A) = A**H
 *
            IF( UPPER ) THEN
               JFIRST = 1
               JLAST = NBA
               JINC = 1
            ELSE
               JFIRST = NBA
               JLAST = 1
               JINC = -1
            END IF
         END IF
         DO J = JFIRST, JLAST, JINC
 *           J1: row index of the first row in A( J, J )
 *           J2: row index of the first row in A( J+1, J+1 )
 *           so that J2 - J1 is the row count of the block A( J, J )
            J1 = (J-1)*NB + 1
            J2 = MIN( J*NB, N ) + 1
 *
 *           Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
 *
            DO KK = 1, K2 - K1
               RHS = K1 + KK - 1
               IF( KK.EQ.1 ) THEN
                  CALL ZLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
     $                         A( J1, J1 ), LDA, X( J1, RHS ),
     $                         SCALOC, CNORM, INFO )
               ELSE
                  CALL ZLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
     $                         A( J1, J1 ), LDA, X( J1, RHS ),
     $                         SCALOC, CNORM, INFO )
               END IF
 *              Find largest absolute value entry in the vector segment
 *              X( J1:J2-1, RHS ) as an upper bound for the worst case
 *              growth in the linear updates.
               XNRM( KK ) = ZLANGE( 'I', J2-J1, 1, X( J1, RHS ),
     $                              LDX, W )
 *
               IF( SCALOC .EQ. ZERO ) THEN
 *                 LATRS found that A is singular through A(j,j) = 0.
 *                 Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
 *                 and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is
 *                 set by LATRS.
                  SCALE( RHS ) = ZERO
                  DO II = 1, J1-1
                     X( II, KK ) = CZERO
                  END DO
                  DO II = J2, N
                     X( II, KK ) = CZERO
                  END DO
 *                 Discard the local scale factors.
                  DO II = 1, NBA
                     WORK( II+KK*LDS ) = ONE
                  END DO
                  SCALOC = ONE
               ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
 *                 LATRS computed a valid scale factor, but combined with
 *                 the current scaling the solution does not have a
 *                 scale factor > 0.
 *
 *                 Set WORK( J+KK*LDS ) to smallest valid scale
 *                 factor and increase SCALOC accordingly.
                  SCAL = WORK( J+KK*LDS ) / SMLNUM
                  SCALOC = SCALOC * SCAL
                  WORK( J+KK*LDS ) = SMLNUM
 *                 If LATRS overestimated the growth, x may be
 *                 rescaled to preserve a valid combined scale
 *                 factor WORK( J, KK ) > 0.
                  RSCAL = ONE / SCALOC
                  IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
                     XNRM( KK ) = XNRM( KK ) * RSCAL
                     CALL ZDSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
                     SCALOC = ONE
                  ELSE
 *                    The system op(A) * x = b is badly scaled and its
 *                    solution cannot be represented as (1/scale) * x.
 *                    Set x to zero. This approach deviates from LATRS
 *                    where a completely meaningless non-zero vector
 *                    is returned that is not a solution to op(A) * x = b.
                     SCALE( RHS ) = ZERO
                     DO II = 1, N
                        X( II, KK ) = CZERO
                     END DO
 *                    Discard the local scale factors.
                     DO II = 1, NBA
                        WORK( II+KK*LDS ) = ONE
                     END DO
                     SCALOC = ONE
                  END IF
               END IF
               SCALOC = SCALOC * WORK( J+KK*LDS )
               WORK( J+KK*LDS ) = SCALOC
            END DO
 *
 *           Linear block updates
 *
            IF( NOTRAN ) THEN
               IF( UPPER ) THEN
                  IFIRST = J - 1
                  ILAST = 1
                  IINC = -1
               ELSE
                  IFIRST = J + 1
                  ILAST = NBA
                  IINC = 1
               END IF
            ELSE
               IF( UPPER ) THEN
                  IFIRST = J + 1
                  ILAST = NBA
                  IINC = 1
               ELSE
                  IFIRST = J - 1
                  ILAST = 1
                  IINC = -1
               END IF
            END IF
 *
            DO I = IFIRST, ILAST, IINC
 *              I1: row index of the first column in X( I, K )
 *              I2: row index of the first column in X( I+1, K )
 *              so the I2 - I1 is the row count of the block X( I, K )
               I1 = (I-1)*NB + 1
               I2 = MIN( I*NB, N ) + 1
 *
 *              Prepare the linear update to be executed with GEMM.
 *              For each column, compute a consistent scaling, a
 *              scaling factor to survive the linear update, and
 *              rescale the column segments, if necesssary. Then
 *              the linear update is safely executed.
 *
               DO KK = 1, K2 - K1
                  RHS = K1 + KK - 1
 *                 Compute consistent scaling
                  SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
 *
 *                 Compute scaling factor to survive the linear update
 *                 simulating consistent scaling.
 *
                  BNRM = ZLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
                  BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
                  XNRM( KK ) = XNRM( KK )*( SCAMIN / WORK( J+KK*LDS) )
                  ANRM = WORK( AWRK + I+(J-1)*NBA )
                  SCALOC = DLARMM( ANRM, XNRM( KK ), BNRM )
 *
 *                 Simultaneously apply the robust update factor and the
 *                 consistency scaling factor to X( I, KK ) and X( J, KK ).
 *
                  SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
                  IF( SCAL.NE.ONE ) THEN
                     CALL ZDSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
                     WORK( I+KK*LDS ) = SCAMIN*SCALOC
                  END IF
 *
                  SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
                  IF( SCAL.NE.ONE ) THEN
                     CALL ZDSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
                     WORK( J+KK*LDS ) = SCAMIN*SCALOC
                  END IF
               END DO
 *
               IF( NOTRAN ) THEN
 *
 *                 B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
 *
                  CALL ZGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -CONE,
     $                        A( I1, J1 ), LDA, X( J1, K1 ), LDX,
     $                        CONE, X( I1, K1 ), LDX )
               ELSE IF( LSAME( TRANS, 'T' ) ) THEN
 *
 *                 B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
 *
                  CALL ZGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -CONE,
     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
     $                        CONE, X( I1, K1 ), LDX )
               ELSE
 *
 *                 B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K )
 *
                  CALL ZGEMM( 'C', 'N', I2-I1, K2-K1, J2-J1, -CONE,
     $                        A( J1, I1 ), LDA, X( J1, K1 ), LDX,
     $                        CONE, X( I1, K1 ), LDX )
               END IF
            END DO
         END DO
 *
 *        Reduce local scaling factors
 *
         DO KK = 1, K2 - K1
            RHS = K1 + KK - 1
            DO I = 1, NBA
               SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
            END DO
         END DO
 *
 *        Realize consistent scaling
 *
         DO KK = 1, K2 - K1
            RHS = K1 + KK - 1
            IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
               DO I = 1, NBA
                  I1 = (I - 1) * NB + 1
                  I2 = MIN( I * NB, N ) + 1
                  SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
                  IF( SCAL.NE.ONE )
     $               CALL ZDSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
               END DO
            END IF
         END DO
      END DO
      RETURN
 *
 *     End of ZLATRS3
 *
      END
--- a/lapack-netlib/SRC/ztrsyl3.c
+++ b/lapack-netlib/SRC/ztrsyl3.c
--- a/lapack-netlib/SRC/ztrsyl3.f
+++ b/lapack-netlib/SRC/ztrsyl3.f
--- a/lapack-netlib/TESTING/EIG/CMakeLists.txt
+++ b/lapack-netlib/TESTING/EIG/CMakeLists.txt
@@ -40,7 +40,7 @@ set(SEIGTST schkee.F
   sget54.f sglmts.f sgqrts.f sgrqts.f sgsvts3.f
   shst01.f slarfy.f slarhs.f slatm4.f slctes.f slctsx.f slsets.f sort01.f
   sort03.f ssbt21.f ssgt01.f sslect.f sspt21.f sstt21.f
   sstt22.f ssyt21.f ssyt22.f)
   sstt22.f ssyl01.f ssyt21.f ssyt22.f)
 set(CEIGTST cchkee.F
   cbdt01.f cbdt02.f cbdt03.f cbdt05.f
@@ -56,7 +56,7 @@ set(CEIGTST cchkee.F
   cget54.f cglmts.f cgqrts.f cgrqts.f cgsvts3.f
   chbt21.f chet21.f chet22.f chpt21.f chst01.f
   clarfy.f clarhs.f clatm4.f clctes.f clctsx.f clsets.f csbmv.f
   csgt01.f cslect.f
   csgt01.f cslect.f csyl01.f
   cstt21.f cstt22.f cunt01.f cunt03.f)
 set(DZIGTST dlafts.f dlahd2.f dlasum.f dlatb9.f dstech.f dstect.f
@@ -77,7 +77,7 @@ set(DEIGTST dchkee.F
   dget54.f dglmts.f dgqrts.f dgrqts.f dgsvts3.f
   dhst01.f dlarfy.f dlarhs.f dlatm4.f dlctes.f dlctsx.f dlsets.f dort01.f
   dort03.f dsbt21.f dsgt01.f dslect.f dspt21.f dstt21.f
   dstt22.f dsyt21.f dsyt22.f)
   dstt22.f dsyl01.f dsyt21.f dsyt22.f)
 set(ZEIGTST zchkee.F
   zbdt01.f zbdt02.f zbdt03.f zbdt05.f
@@ -93,7 +93,7 @@ set(ZEIGTST zchkee.F
   zget54.f zglmts.f zgqrts.f zgrqts.f zgsvts3.f
   zhbt21.f zhet21.f zhet22.f zhpt21.f zhst01.f
   zlarfy.f zlarhs.f zlatm4.f zlctes.f zlctsx.f zlsets.f zsbmv.f
   zsgt01.f zslect.f
   zsgt01.f zslect.f zsyl01.f
   zstt21.f zstt22.f zunt01.f zunt03.f)
 macro(add_eig_executable name)
--- a/lapack-netlib/TESTING/EIG/Makefile
+++ b/lapack-netlib/TESTING/EIG/Makefile
@@ -62,7 +62,7 @@ SEIGTST = schkee.o \
   sget54.o sglmts.o sgqrts.o sgrqts.o sgsvts3.o \
   shst01.o slarfy.o slarhs.o slatm4.o slctes.o slctsx.o slsets.o sort01.o \
   sort03.o ssbt21.o ssgt01.o sslect.o sspt21.o sstt21.o \
   sstt22.o ssyt21.o ssyt22.o
   sstt22.o ssyl01.o ssyt21.o ssyt22.o
 CEIGTST = cchkee.o \
   cbdt01.o cbdt02.o cbdt03.o cbdt05.o \
@@ -78,7 +78,7 @@ CEIGTST = cchkee.o \
   cget54.o cglmts.o cgqrts.o cgrqts.o cgsvts3.o \
   chbt21.o chet21.o chet22.o chpt21.o chst01.o \
   clarfy.o clarhs.o clatm4.o clctes.o clctsx.o clsets.o csbmv.o \
   csgt01.o cslect.o \
   csgt01.o cslect.o csyl01.o\
   cstt21.o cstt22.o cunt01.o cunt03.o
 DZIGTST = dlafts.o dlahd2.o dlasum.o dlatb9.o dstech.o dstect.o \
@@ -99,7 +99,7 @@ DEIGTST = dchkee.o \
   dget54.o dglmts.o dgqrts.o dgrqts.o dgsvts3.o \
   dhst01.o dlarfy.o dlarhs.o dlatm4.o dlctes.o dlctsx.o dlsets.o dort01.o \
   dort03.o dsbt21.o dsgt01.o dslect.o dspt21.o dstt21.o \
   dstt22.o dsyt21.o dsyt22.o
   dstt22.o dsyl01.o dsyt21.o dsyt22.o
 ZEIGTST = zchkee.o \
   zbdt01.o zbdt02.o zbdt03.o zbdt05.o \
@@ -115,7 +115,7 @@ ZEIGTST = zchkee.o \
   zget54.o zglmts.o zgqrts.o zgrqts.o zgsvts3.o \
   zhbt21.o zhet21.o zhet22.o zhpt21.o zhst01.o \
   zlarfy.o zlarhs.o zlatm4.o zlctes.o zlctsx.o zlsets.o zsbmv.o \
   zsgt01.o zslect.o \
   zsgt01.o zslect.o zsyl01.o\
   zstt21.o zstt22.o zunt01.o zunt03.o
 .PHONY: all
--- a/lapack-netlib/TESTING/EIG/cchkec.f
+++ b/lapack-netlib/TESTING/EIG/cchkec.f
@@ -23,7 +23,7 @@
 *> \verbatim
 *>
 *> CCHKEC tests eigen- condition estimation routines
 *>        CTRSYL, CTREXC, CTRSNA, CTRSEN
 *>        CTRSYL, CTRSYL3, CTREXC, CTRSNA, CTRSEN
 *>
 *> In all cases, the routine runs through a fixed set of numerical
 *> examples, subjects them to various tests, and compares the test
@@ -88,17 +88,17 @@
 *     .. Local Scalars ..
      LOGICAL            OK
      CHARACTER*3        PATH
      INTEGER            KTREXC, KTRSEN, KTRSNA, KTRSYL, LTREXC, LTRSYL,
     $                   NTESTS, NTREXC, NTRSYL
      REAL               EPS, RTREXC, RTRSYL, SFMIN
      INTEGER            KTREXC, KTRSEN, KTRSNA, KTRSYL, KTRSYL3,
     $                   LTREXC, LTRSYL, NTESTS, NTREXC, NTRSYL
      REAL               EPS, RTREXC, SFMIN
 *     ..
 *     .. Local Arrays ..
      INTEGER            LTRSEN( 3 ), LTRSNA( 3 ), NTRSEN( 3 ),
     $                   NTRSNA( 3 )
      REAL               RTRSEN( 3 ), RTRSNA( 3 )
      INTEGER            FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
     $                   LTRSNA( 3 ), NTRSEN( 3 ), NTRSNA( 3 )
      REAL               RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           CERREC, CGET35, CGET36, CGET37, CGET38
      EXTERNAL           CERREC, CGET35, CGET36, CGET37, CGET38, CSYL01
 *     ..
 *     .. External Functions ..
      REAL               SLAMCH
@@ -120,10 +120,24 @@
     $   CALL CERREC( PATH, NOUT )
 *
      OK = .TRUE.
      CALL CGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL, NIN )
      IF( RTRSYL.GT.THRESH ) THEN
      CALL CGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL, NIN )
      IF( RTRSYL( 1 ).GT.THRESH ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9999 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
         WRITE( NOUT, FMT = 9999 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
      END IF
 *
      CALL CSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
      IF( FTRSYL( 1 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
      END IF
      IF( FTRSYL( 2 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
      END IF
      IF( FTRSYL( 3 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
      END IF
 *
      CALL CGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@@ -169,6 +183,12 @@
     $      / ' Safe minimum (SFMIN)             = ', E16.6, / )
 9992 FORMAT( ' Routines pass computational tests if test ratio is ',
     $      'less than', F8.2, / / )
 9972 FORMAT( 'CTRSYL and CTRSYL3 compute an inconsistent scale ',
     $      'factor in ', I8, ' tests.')
 9971 FORMAT( 'Error in CTRSYL3: ', I8, ' tests fail the threshold.', /
     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
 9970 FORMAT( 'Error in CTRSYL: ', I8, ' tests fail the threshold.', /
     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
      RETURN
 *
 *     End of CCHKEC
--- a/lapack-netlib/TESTING/EIG/cerrec.f
+++ b/lapack-netlib/TESTING/EIG/cerrec.f
@@ -23,7 +23,7 @@
 *>
 *> CERREC tests the error exits for the routines for eigen- condition
 *> estimation for REAL matrices:
 *>    CTRSYL, CTREXC, CTRSNA and CTRSEN.
 *>    CTRSYL, CTRSYL3, CTREXC, CTRSNA and CTRSEN.
 *> \endverbatim
 *
 *  Arguments:
@@ -77,12 +77,12 @@
 *     ..
 *     .. Local Arrays ..
      LOGICAL            SEL( NMAX )
      REAL               RW( LW ), S( NMAX ), SEP( NMAX )
      REAL               RW( LW ), S( NMAX ), SEP( NMAX ), SWORK( NMAX )
      COMPLEX            A( NMAX, NMAX ), B( NMAX, NMAX ),
     $                   C( NMAX, NMAX ), WORK( LW ), X( NMAX )
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           CHKXER, CTREXC, CTRSEN, CTRSNA, CTRSYL
      EXTERNAL           CHKXER, CTREXC, CTRSEN, CTRSNA, CTRSYL, CTRSYL3
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -141,6 +141,43 @@
      CALL CHKXER( 'CTRSYL', INFOT, NOUT, LERR, OK )
      NT = NT + 8
 *
 *     Test CTRSYL3
 *
      SRNAMT = 'CTRSYL3'
      INFOT = 1
      CALL CTRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 2
      CALL CTRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 3
      CALL CTRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 4
      CALL CTRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 5
      CALL CTRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 7
      CALL CTRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 9
      CALL CTRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 11
      CALL CTRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'CTRSYL3', INFOT, NOUT, LERR, OK )
      NT = NT + 8
 *
 *     Test CTREXC
 *
      SRNAMT = 'CTREXC'
--- a/lapack-netlib/TESTING/EIG/csyl01.f
+++ b/lapack-netlib/TESTING/EIG/csyl01.f
@@ -0,0 +1,294 @@
 *> \brief \b CSYL01
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE CSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
 *
 *     .. Scalar Arguments ..
 *     INTEGER            KNT
 *     REAL               THRESH
 *     ..
 *     .. Array Arguments ..
 *     INTEGER            NFAIL( 3 ), NINFO( 2 )
 *     REAL               RMAX( 2 )
 *     ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> CSYL01 tests CTRSYL and CTRSYL3, routines for solving the Sylvester matrix
 *> equation
 *>
 *>    op(A)*X + ISGN*X*op(B) = scale*C,
 *>
 *> where op(A) and op(B) are both upper triangular form, op() represents an
 *> optional conjugate transpose, and ISGN can be -1 or +1. Scale is an output
 *> less than or equal to 1, chosen to avoid overflow in X.
 *>
 *> The test code verifies that the following residual does not exceed
 *> the provided threshold:
 *>
 *>    norm(op(A)*X + ISGN*X*op(B) - scale*C) /
 *>        (EPS*max(norm(A),norm(B))*norm(X))
 *>
 *> This routine complements CGET35 by testing with larger,
 *> random matrices, of which some require rescaling of X to avoid overflow.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] THRESH
 *> \verbatim
 *>          THRESH is REAL
 *>          A test will count as "failed" if the residual, computed as
 *>          described above, exceeds THRESH.
 *> \endverbatim
 *>
 *> \param[out] NFAIL
 *> \verbatim
 *>          NFAIL is INTEGER array, dimension (3)
 *>          NFAIL(1) = No. of times residual CTRSYL exceeds threshold THRESH
 *>          NFAIL(2) = No. of times residual CTRSYL3 exceeds threshold THRESH
 *>          NFAIL(3) = No. of times CTRSYL3 and CTRSYL deviate
 *> \endverbatim
 *>
 *> \param[out] RMAX
 *> \verbatim
 *>          RMAX is DOUBLE PRECISION array, dimension (2)
 *>          RMAX(1) = Value of the largest test ratio of CTRSYL
 *>          RMAX(2) = Value of the largest test ratio of CTRSYL3
 *> \endverbatim
 *>
 *> \param[out] NINFO
 *> \verbatim
 *>          NINFO is INTEGER array, dimension (2)
 *>          NINFO(1) = No. of times CTRSYL where INFO is nonzero
 *>          NINFO(2) = No. of times CTRSYL3 where INFO is nonzero
 *> \endverbatim
 *>
 *> \param[out] KNT
 *> \verbatim
 *>          KNT is INTEGER
 *>          Total number of examples tested.
 *> \endverbatim
 *
 *  -- LAPACK test routine --
      SUBROUTINE CSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
      IMPLICIT NONE
 *
 *  -- LAPACK test routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            KNT
      REAL               THRESH
 *     ..
 *     .. Array Arguments ..
      INTEGER            NFAIL( 3 ), NINFO( 2 )
      REAL               RMAX( 2 )
 *     ..
 *
 *  =====================================================================
 *     ..
 *     .. Parameters ..
      COMPLEX            CONE
      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
      REAL               ONE, ZERO
      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
      INTEGER            MAXM, MAXN, LDSWORK
      PARAMETER          ( MAXM = 101, MAXN = 138, LDSWORK = 18 )
 *     ..
 *     .. Local Scalars ..
      CHARACTER          TRANA, TRANB
      INTEGER            I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
     $                   KUA, KLB, KUB, M, N
      REAL               ANRM, BNRM, BIGNUM, EPS, RES, RES1,
     $                   SCALE, SCALE3, SMLNUM, TNRM, XNRM
      COMPLEX            RMUL
 *     ..
 *     .. Local Arrays ..
      COMPLEX            A( MAXM, MAXM ), B( MAXN, MAXN ),
     $                   C( MAXM, MAXN ), CC( MAXM, MAXN ),
     $                   X( MAXM, MAXN ),
     $                   DUML( MAXM ), DUMR( MAXN ),
     $                   D( MIN( MAXM, MAXN ) )
      REAL               SWORK( LDSWORK, 54 ), DUM( MAXN ), VM( 2 )
      INTEGER            ISEED( 4 ), IWORK( MAXM + MAXN + 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            SISNAN
      REAL               SLAMCH, CLANGE
      EXTERNAL           SISNAN, SLAMCH, CLANGE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           CLATMR, CLACPY, CGEMM, CTRSYL, CTRSYL3
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, REAL, MAX
 *     ..
 *     .. Executable Statements ..
 *
 *     Get machine parameters
 *
      EPS = SLAMCH( 'P' )
      SMLNUM = SLAMCH( 'S' ) / EPS
      BIGNUM = ONE / SMLNUM
 *
 *     Expect INFO = 0
      VM( 1 ) = ONE
 *     Expect INFO = 1
      VM( 2 ) = 0.5E+0
 *
 *     Begin test loop
 *
      NINFO( 1 ) = 0
      NINFO( 2 ) = 0
      NFAIL( 1 ) = 0
      NFAIL( 2 ) = 0
      NFAIL( 3 ) = 0
      RMAX( 1 ) = ZERO
      RMAX( 2 ) = ZERO
      KNT = 0
      ISEED( 1 ) = 1
      ISEED( 2 ) = 1
      ISEED( 3 ) = 1
      ISEED( 4 ) = 1
      SCALE = ONE
      SCALE3 = ONE
      DO J = 1, 2
         DO ISGN = -1, 1, 2
 *           Reset seed (overwritten by LATMR)
            ISEED( 1 ) = 1
            ISEED( 2 ) = 1
            ISEED( 3 ) = 1
            ISEED( 4 ) = 1
            DO M = 32, MAXM, 23
               KLA = 0
               KUA = M - 1
               CALL CLATMR( M, M, 'S', ISEED, 'N', D,
     $                      6, ONE, CONE, 'T', 'N',
     $                      DUML, 1, ONE, DUMR, 1, ONE,
     $                      'N', IWORK, KLA, KUA, ZERO,
     $                      ONE, 'NO', A, MAXM, IWORK,
     $                      IINFO )
               DO I = 1, M
                  A( I, I ) = A( I, I ) * VM( J )
               END DO
               ANRM = CLANGE( 'M', M, M, A, MAXM, DUM )
               DO N = 51, MAXN, 29
                  KLB = 0
                  KUB = N - 1
                  CALL CLATMR( N, N, 'S', ISEED, 'N', D,
     $                         6, ONE, CONE, 'T', 'N',
     $                         DUML, 1, ONE, DUMR, 1, ONE,
     $                         'N', IWORK, KLB, KUB, ZERO,
     $                         ONE, 'NO', B, MAXN, IWORK,
     $                         IINFO )
                  DO I = 1, N
                     B( I, I ) = B( I, I ) * VM ( J )
                  END DO
                  BNRM = CLANGE( 'M', N, N, B, MAXN, DUM )
                  TNRM = MAX( ANRM, BNRM )
                  CALL CLATMR( M, N, 'S', ISEED, 'N', D,
     $                         6, ONE, CONE, 'T', 'N',
     $                         DUML, 1, ONE, DUMR, 1, ONE,
     $                         'N', IWORK, M, N, ZERO, ONE,
     $                         'NO', C, MAXM, IWORK, IINFO )
                  DO ITRANA = 1, 2
                     IF( ITRANA.EQ.1 )
     $                   TRANA = 'N'
                     IF( ITRANA.EQ.2 )
     $                   TRANA = 'C'
                     DO ITRANB = 1, 2
                        IF( ITRANB.EQ.1 )
     $                     TRANB = 'N'
                        IF( ITRANB.EQ.2 )
     $                     TRANB = 'C'
                        KNT = KNT + 1
 *
                        CALL CLACPY( 'All', M, N, C, MAXM, X, MAXM)
                        CALL CLACPY( 'All', M, N, C, MAXM, CC, MAXM)
                        CALL CTRSYL( TRANA, TRANB, ISGN, M, N, 
     $                               A, MAXM, B, MAXN, X, MAXM,
     $                               SCALE, IINFO )
                        IF( IINFO.NE.0 )
     $                     NINFO( 1 ) = NINFO( 1 ) + 1
                        XNRM = CLANGE( 'M', M, N, X, MAXM, DUM )
                        RMUL = CONE
                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
                              RMUL = CONE / MAX( XNRM, TNRM )
                           END IF
                        END IF
                        CALL CGEMM( TRANA, 'N', M, N, M, RMUL,
     $                              A, MAXM, X, MAXM, -SCALE*RMUL,
     $                              CC, MAXM )
                        CALL CGEMM( 'N', TRANB, M, N, N,
     $                              REAL( ISGN )*RMUL, X, MAXM, B,
     $                              MAXN, CONE, CC, MAXM )
                        RES1 = CLANGE( 'M', M, N, CC, MAXM, DUM )
                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
     $                        ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
                        IF( RES.GT.THRESH )
     $                     NFAIL( 1 ) = NFAIL( 1 ) + 1
                        IF( RES.GT.RMAX( 1 ) )
     $                     RMAX( 1 ) = RES
 *
                        CALL CLACPY( 'All', M, N, C, MAXM, X, MAXM )
                        CALL CLACPY( 'All', M, N, C, MAXM, CC, MAXM )
                        CALL CTRSYL3( TRANA, TRANB, ISGN, M, N,
     $                                A, MAXM, B, MAXN, X, MAXM,
     $                                SCALE3, SWORK, LDSWORK, INFO)
                        IF( INFO.NE.0 )
     $                     NINFO( 2 ) = NINFO( 2 ) + 1
                        XNRM = CLANGE( 'M', M, N, X, MAXM, DUM )
                        RMUL = CONE
                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
                              RMUL = CONE / MAX( XNRM, TNRM )
                           END IF
                        END IF
                        CALL CGEMM( TRANA, 'N', M, N, M, RMUL,
     $                              A, MAXM, X, MAXM, -SCALE3*RMUL,
     $                              CC, MAXM )
                        CALL CGEMM( 'N', TRANB, M, N, N,
     $                              REAL( ISGN )*RMUL, X, MAXM, B,
     $                              MAXN, CONE, CC, MAXM )
                        RES1 = CLANGE( 'M', M, N, CC, MAXM, DUM )
                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
     $                             ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
 *                       Verify that TRSYL3 only flushes if TRSYL flushes (but
 *                       there may be cases where TRSYL3 avoid flushing).
                        IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR. 
     $                      IINFO.NE.INFO ) THEN
                           NFAIL( 3 ) = NFAIL( 3 ) + 1
                        END IF
                        IF( RES.GT.THRESH .OR. SISNAN( RES ) )
     $                     NFAIL( 2 ) = NFAIL( 2 ) + 1
                        IF( RES.GT.RMAX( 2 ) )
     $                     RMAX( 2 ) = RES
                     END DO
                  END DO
               END DO
            END DO
         END DO
      END DO
 *
      RETURN
 *
 *     End of CSYL01
 *
      END
--- a/lapack-netlib/TESTING/EIG/dchkec.f
+++ b/lapack-netlib/TESTING/EIG/dchkec.f
@@ -90,21 +90,23 @@
      LOGICAL            OK
      CHARACTER*3        PATH
      INTEGER            KLAEXC, KLALN2, KLANV2, KLAQTR, KLASY2, KTREXC,
     $                   KTRSEN, KTRSNA, KTRSYL, LLAEXC, LLALN2, LLANV2,
     $                   LLAQTR, LLASY2, LTREXC, LTRSYL, NLANV2, NLAQTR,
     $                   NLASY2, NTESTS, NTRSYL, KTGEXC, NTGEXC, LTGEXC
     $                   KTRSEN, KTRSNA, KTRSYL, KTRSYL3, LLAEXC,
     $                   LLALN2, LLANV2, LLAQTR, LLASY2, LTREXC, LTRSYL,
     $                   NLANV2, NLAQTR, NLASY2, NTESTS, NTRSYL, KTGEXC,
     $                   LTGEXC
      DOUBLE PRECISION   EPS, RLAEXC, RLALN2, RLANV2, RLAQTR, RLASY2,
     $                   RTREXC, RTRSYL, SFMIN, RTGEXC
     $                   RTREXC, SFMIN, RTGEXC
 *     ..
 *     .. Local Arrays ..
      INTEGER            LTRSEN( 3 ), LTRSNA( 3 ), NLAEXC( 2 ),
     $                   NLALN2( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
      INTEGER            FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
     $                   LTRSNA( 3 ), NLAEXC( 2 ), NLALN2( 2 ),
     $                   NTGEXC( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
     $                   NTRSNA( 3 )
      DOUBLE PRECISION   RTRSEN( 3 ), RTRSNA( 3 )
      DOUBLE PRECISION   RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DERREC, DGET31, DGET32, DGET33, DGET34, DGET35,
     $                   DGET36, DGET37, DGET38, DGET39, DGET40
     $                   DGET36, DGET37, DGET38, DGET39, DGET40, DSYL01
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLAMCH
@@ -153,10 +155,24 @@
         WRITE( NOUT, FMT = 9996 )RLAEXC, LLAEXC, NLAEXC, KLAEXC
      END IF
 *
      CALL DGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL )
      IF( RTRSYL.GT.THRESH ) THEN
      CALL DGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL )
      IF( RTRSYL( 1 ).GT.THRESH ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9995 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
         WRITE( NOUT, FMT = 9995 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
      END IF
 *
      CALL DSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
      IF( FTRSYL( 1 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
      END IF
      IF( FTRSYL( 2 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
      END IF
      IF( FTRSYL( 3 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
      END IF
 *
      CALL DGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@@ -227,7 +243,13 @@
 9987 FORMAT( ' Routines pass computational tests if test ratio is les',
     $      's than', F8.2, / / )
 9986 FORMAT( ' Error in DTGEXC: RMAX =', D12.3, / ' LMAX = ', I8, ' N',
     $      'INFO=', I8, ' KNT=', I8 )
     $      'INFO=', 2I8, ' KNT=', I8 )
 9972 FORMAT( 'DTRSYL and DTRSYL3 compute an inconsistent result ',
     $      'factor in ', I8, ' tests.')
 9971 FORMAT( 'Error in DTRSYL3: ', I8, ' tests fail the threshold.', /
     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
 9970 FORMAT( 'Error in DTRSYL: ', I8, ' tests fail the threshold.', /
     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
 *
 *     End of DCHKEC
 *
--- a/lapack-netlib/TESTING/EIG/derrec.f
+++ b/lapack-netlib/TESTING/EIG/derrec.f
@@ -23,7 +23,7 @@
 *>
 *> DERREC tests the error exits for the routines for eigen- condition
 *> estimation for DOUBLE PRECISION matrices:
 *>    DTRSYL, DTREXC, DTRSNA and DTRSEN.
 *>    DTRSYL, DTRSYL3, DTREXC, DTRSNA and DTRSEN.
 *> \endverbatim
 *
 *  Arguments:
@@ -82,7 +82,7 @@
     $                   WI( NMAX ), WORK( NMAX ), WR( NMAX )
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           CHKXER, DTREXC, DTRSEN, DTRSNA, DTRSYL
      EXTERNAL           CHKXER, DTREXC, DTRSEN, DTRSNA, DTRSYL, DTRSYL3
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -141,6 +141,43 @@
      CALL CHKXER( 'DTRSYL', INFOT, NOUT, LERR, OK )
      NT = NT + 8
 *
 *     Test DTRSYL3
 *
      SRNAMT = 'DTRSYL3'
      INFOT = 1
      CALL DTRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 2
      CALL DTRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 3
      CALL DTRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 4
      CALL DTRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 5
      CALL DTRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 7
      CALL DTRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 9
      CALL DTRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 11
      CALL DTRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'DTRSYL3', INFOT, NOUT, LERR, OK )
      NT = NT + 8
 *
 *     Test DTREXC
 *
      SRNAMT = 'DTREXC'
--- a/lapack-netlib/TESTING/EIG/dsyl01.f
+++ b/lapack-netlib/TESTING/EIG/dsyl01.f
@@ -0,0 +1,288 @@
 *> \brief \b DSYL01
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            KNT
 *       DOUBLE PRECISION   THRESH
 *       ..
 *       .. Array Arguments ..
 *       INTEGER            NFAIL( 3 ), NINFO( 2 )
 *       DOUBLE PRECISION   RMAX( 2 )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DSYL01 tests DTRSYL and DTRSYL3, routines for solving the Sylvester matrix
 *> equation
 *>
 *>    op(A)*X + ISGN*X*op(B) = scale*C,
 *>
 *> A and B are assumed to be in Schur canonical form, op() represents an
 *> optional transpose, and ISGN can be -1 or +1.  Scale is an output
 *> less than or equal to 1, chosen to avoid overflow in X.
 *>
 *> The test code verifies that the following residual does not exceed
 *> the provided threshold:
 *>
 *>    norm(op(A)*X + ISGN*X*op(B) - scale*C) /
 *>        (EPS*max(norm(A),norm(B))*norm(X))
 *>
 *> This routine complements DGET35 by testing with larger,
 *> random matrices, of which some require rescaling of X to avoid overflow.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] THRESH
 *> \verbatim
 *>          THRESH is DOUBLE PRECISION
 *>          A test will count as "failed" if the residual, computed as
 *>          described above, exceeds THRESH.
 *> \endverbatim
 *>
 *> \param[out] NFAIL
 *> \verbatim
 *>          NFAIL is INTEGER array, dimension (3)
 *>          NFAIL(1) = No. of times residual DTRSYL exceeds threshold THRESH
 *>          NFAIL(2) = No. of times residual DTRSYL3 exceeds threshold THRESH
 *>          NFAIL(3) = No. of times DTRSYL3 and DTRSYL deviate
 *> \endverbatim
 *>
 *> \param[out] RMAX
 *> \verbatim
 *>          RMAX is DOUBLE PRECISION, dimension (2)
 *>          RMAX(1) = Value of the largest test ratio of DTRSYL
 *>          RMAX(2) = Value of the largest test ratio of DTRSYL3
 *> \endverbatim
 *>
 *> \param[out] NINFO
 *> \verbatim
 *>          NINFO is INTEGER array, dimension (2)
 *>          NINFO(1) = No. of times DTRSYL returns an expected INFO
 *>          NINFO(2) = No. of times DTRSYL3 returns an expected INFO
 *> \endverbatim
 *>
 *> \param[out] KNT
 *> \verbatim
 *>          KNT is INTEGER
 *>          Total number of examples tested.
 *> \endverbatim
 *
 *  -- LAPACK test routine --
      SUBROUTINE DSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
      IMPLICIT NONE
 *
 *  -- LAPACK test routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            KNT
      DOUBLE PRECISION   THRESH
 *     ..
 *     .. Array Arguments ..
      INTEGER            NFAIL( 3 ), NINFO( 2 )
      DOUBLE PRECISION   RMAX( 2 )
 *     ..
 *
 *  =====================================================================
 *     ..
 *     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE
      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
      INTEGER            MAXM, MAXN, LDSWORK
      PARAMETER          ( MAXM = 245, MAXN = 192, LDSWORK = 36 )
 *     ..
 *     .. Local Scalars ..
      CHARACTER          TRANA, TRANB
      INTEGER            I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
     $                   KUA, KLB, KUB, LIWORK, M, N
      DOUBLE PRECISION   ANRM, BNRM, BIGNUM, EPS, RES, RES1, RMUL,
     $                   SCALE, SCALE3, SMLNUM, TNRM, XNRM
 *     ..
 *     .. Local Arrays ..
      DOUBLE PRECISION   A( MAXM, MAXM ), B( MAXN, MAXN ),
     $                   C( MAXM, MAXN ), CC( MAXM, MAXN ),
     $                   X( MAXM, MAXN ),
     $                   DUML( MAXM ), DUMR( MAXN ),
     $                   D( MAX( MAXM, MAXN ) ), DUM( MAXN ),
     $                   SWORK( LDSWORK, 126 ), VM( 2 )
      INTEGER            ISEED( 4 ), IWORK( MAXM + MAXN + 2 ), IDUM( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            DISNAN
      DOUBLE PRECISION   DLAMCH, DLANGE
      EXTERNAL           DLAMCH, DLANGE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLATMR, DLACPY, DGEMM, DTRSYL, DTRSYL3
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, MAX
 *     ..
 *     .. Executable Statements ..
 *
 *     Get machine parameters
 *
      EPS = DLAMCH( 'P' )
      SMLNUM = DLAMCH( 'S' ) / EPS
      BIGNUM = ONE / SMLNUM
 *
      VM( 1 ) = ONE
      VM( 2 ) = 0.000001D+0
 *
 *     Begin test loop
 *
      NINFO( 1 ) = 0
      NINFO( 2 ) = 0
      NFAIL( 1 ) = 0
      NFAIL( 2 ) = 0
      NFAIL( 3 ) = 0
      RMAX( 1 ) = ZERO
      RMAX( 2 ) = ZERO
      KNT = 0
      DO I = 1, 4
         ISEED( I ) = 1
      END DO
      SCALE = ONE
      SCALE3 = ONE
      LIWORK = MAXM + MAXN + 2
      DO J = 1, 2
         DO ISGN = -1, 1, 2
 *           Reset seed (overwritten by LATMR)
            DO I = 1, 4
               ISEED( I ) = 1
            END DO
            DO M = 32, MAXM, 71
               KLA = 0
               KUA = M - 1
               CALL DLATMR( M, M, 'S', ISEED, 'N', D,
     $                      6, ONE, ONE, 'T', 'N',
     $                      DUML, 1, ONE, DUMR, 1, ONE,
     $                      'N', IWORK, KLA, KUA, ZERO,
     $                      ONE, 'NO', A, MAXM, IWORK, IINFO )
               DO I = 1, M
                  A( I, I ) = A( I, I ) * VM( J )
               END DO
               ANRM = DLANGE( 'M', M, M, A, MAXM, DUM )
               DO N = 51, MAXN, 47
                  KLB = 0
                  KUB = N - 1
                  CALL DLATMR( N, N, 'S', ISEED, 'N', D,
     $                         6, ONE, ONE, 'T', 'N',
     $                         DUML, 1, ONE, DUMR, 1, ONE,
     $                         'N', IWORK, KLB, KUB, ZERO,
     $                         ONE, 'NO', B, MAXN, IWORK, IINFO )
                  BNRM = DLANGE( 'M', N, N, B, MAXN, DUM )
                  TNRM = MAX( ANRM, BNRM )
                  CALL DLATMR( M, N, 'S', ISEED, 'N', D,
     $                         6, ONE, ONE, 'T', 'N',
     $                         DUML, 1, ONE, DUMR, 1, ONE,
     $                         'N', IWORK, M, N, ZERO, ONE,
     $                         'NO', C, MAXM, IWORK, IINFO )
                  DO ITRANA = 1, 2
                     IF( ITRANA.EQ.1 ) THEN
                        TRANA = 'N'
                     END IF
                     IF( ITRANA.EQ.2 ) THEN
                        TRANA = 'T'
                     END IF
                     DO ITRANB = 1, 2
                        IF( ITRANB.EQ.1 ) THEN
                           TRANB = 'N'
                        END IF
                        IF( ITRANB.EQ.2 ) THEN
                           TRANB = 'T'
                        END IF
                        KNT = KNT + 1
 *
                        CALL DLACPY( 'All', M, N, C, MAXM, X, MAXM)
                        CALL DLACPY( 'All', M, N, C, MAXM, CC, MAXM)
                        CALL DTRSYL( TRANA, TRANB, ISGN, M, N, 
     $                               A, MAXM, B, MAXN, X, MAXM,
     $                               SCALE, IINFO )
                        IF( IINFO.NE.0 )
     $                     NINFO( 1 ) = NINFO( 1 ) + 1
                        XNRM = DLANGE( 'M', M, N, X, MAXM, DUM )
                        RMUL = ONE
                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
                              RMUL = ONE / MAX( XNRM, TNRM )
                           END IF
                        END IF
                        CALL DGEMM( TRANA, 'N', M, N, M, RMUL,
     $                              A, MAXM, X, MAXM, -SCALE*RMUL,
     $                              CC, MAXM )
                        CALL DGEMM( 'N', TRANB, M, N, N,
     $                               DBLE( ISGN )*RMUL, X, MAXM, B,
     $                               MAXN, ONE, CC, MAXM )
                        RES1 = DLANGE( 'M', M, N, CC, MAXM, DUM )
                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
     $                              ( ( RMUL*TNRM )*EPS )*XNRM )
                        IF( RES.GT.THRESH )
     $                     NFAIL( 1 ) = NFAIL( 1 ) + 1
                        IF( RES.GT.RMAX( 1 ) )
     $                     RMAX( 1 ) = RES
 *
                        CALL DLACPY( 'All', M, N, C, MAXM, X, MAXM )
                        CALL DLACPY( 'All', M, N, C, MAXM, CC, MAXM )
                        CALL DTRSYL3( TRANA, TRANB, ISGN, M, N,
     $                                A, MAXM, B, MAXN, X, MAXM,
     $                                SCALE3, IWORK, LIWORK,
     $                                SWORK, LDSWORK, INFO)
                        IF( INFO.NE.0 )
     $                     NINFO( 2 ) = NINFO( 2 ) + 1
                        XNRM = DLANGE( 'M', M, N, X, MAXM, DUM )
                        RMUL = ONE
                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
                              RMUL = ONE / MAX( XNRM, TNRM )
                           END IF
                        END IF
                        CALL DGEMM( TRANA, 'N', M, N, M, RMUL,
     $                              A, MAXM, X, MAXM, -SCALE3*RMUL,
     $                              CC, MAXM )
                        CALL DGEMM( 'N', TRANB, M, N, N,
     $                              DBLE( ISGN )*RMUL, X, MAXM, B,
     $                              MAXN, ONE, CC, MAXM )
                        RES1 = DLANGE( 'M', M, N, CC, MAXM, DUM )
                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
     $                             ( ( RMUL*TNRM )*EPS )*XNRM )
 *                       Verify that TRSYL3 only flushes if TRSYL flushes (but
 *                       there may be cases where TRSYL3 avoid flushing).
                        IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR. 
     $                      IINFO.NE.INFO ) THEN
                           NFAIL( 3 ) = NFAIL( 3 ) + 1
                        END IF
                        IF( RES.GT.THRESH .OR. DISNAN( RES ) )
     $                     NFAIL( 2 ) = NFAIL( 2 ) + 1
                        IF( RES.GT.RMAX( 2 ) )
     $                     RMAX( 2 ) = RES
                     END DO
                  END DO
               END DO
            END DO
         END DO
      END DO
 *
      RETURN
 *
 *     End of DSYL01
 *
      END
--- a/lapack-netlib/TESTING/EIG/schkec.f
+++ b/lapack-netlib/TESTING/EIG/schkec.f
@@ -90,21 +90,23 @@
      LOGICAL            OK
      CHARACTER*3        PATH
      INTEGER            KLAEXC, KLALN2, KLANV2, KLAQTR, KLASY2, KTREXC,
     $                   KTRSEN, KTRSNA, KTRSYL, LLAEXC, LLALN2, LLANV2,
     $                   LLAQTR, LLASY2, LTREXC, LTRSYL, NLANV2, NLAQTR,
     $                   NLASY2, NTESTS, NTRSYL, KTGEXC, NTGEXC, LTGEXC
     $                   KTRSEN, KTRSNA, KTRSYL, KTRSYL3, LLAEXC,
     $                   LLALN2, LLANV2, LLAQTR, LLASY2, LTREXC, LTRSYL,
     $                   NLANV2, NLAQTR, NLASY2, NTESTS, NTRSYL, KTGEXC,
     $                   LTGEXC
      REAL               EPS, RLAEXC, RLALN2, RLANV2, RLAQTR, RLASY2,
     $                   RTREXC, RTRSYL, SFMIN, RTGEXC
     $                   RTREXC, SFMIN, RTGEXC
 *     ..
 *     .. Local Arrays ..
      INTEGER            LTRSEN( 3 ), LTRSNA( 3 ), NLAEXC( 2 ),
     $                   NLALN2( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
      INTEGER            FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
     $                   LTRSNA( 3 ), NLAEXC( 2 ), NLALN2( 2 ),
     $                   NTGEXC( 2 ), NTREXC( 3 ), NTRSEN( 3 ),
     $                   NTRSNA( 3 )
      REAL               RTRSEN( 3 ), RTRSNA( 3 )
      REAL               RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           SERREC, SGET31, SGET32, SGET33, SGET34, SGET35,
     $                   SGET36, SGET37, SGET38, SGET39, SGET40
     $                   SGET36, SGET37, SGET38, SGET39, SGET40, SSYL01
 *     ..
 *     .. External Functions ..
      REAL               SLAMCH
@@ -153,10 +155,24 @@
         WRITE( NOUT, FMT = 9996 )RLAEXC, LLAEXC, NLAEXC, KLAEXC
      END IF
 *
      CALL SGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL )
      IF( RTRSYL.GT.THRESH ) THEN
      CALL SGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL )
      IF( RTRSYL( 1 ).GT.THRESH ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9995 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
         WRITE( NOUT, FMT = 9995 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
      END IF
 *
      CALL SSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
      IF( FTRSYL( 1 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
      END IF
      IF( FTRSYL( 2 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
      END IF
      IF( FTRSYL( 3 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
      END IF
 *
      CALL SGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@@ -227,7 +243,13 @@
 9987 FORMAT( ' Routines pass computational tests if test ratio is les',
     $      's than', F8.2, / / )
 9986 FORMAT( ' Error in STGEXC: RMAX =', E12.3, / ' LMAX = ', I8, ' N',
     $      'INFO=', I8, ' KNT=', I8 )
     $      'INFO=', 2I8, ' KNT=', I8 )
 9972 FORMAT( 'STRSYL and STRSYL3 compute an inconsistent result ',
     $      'factor in ', I8, ' tests.')
 9971 FORMAT( 'Error in STRSYL3: ', I8, ' tests fail the threshold.', /
     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
 9970 FORMAT( 'Error in STRSYL: ', I8, ' tests fail the threshold.', /
     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
 *
 *     End of SCHKEC
 *
--- a/lapack-netlib/TESTING/EIG/serrec.f
+++ b/lapack-netlib/TESTING/EIG/serrec.f
@@ -23,7 +23,7 @@
 *>
 *> SERREC tests the error exits for the routines for eigen- condition
 *> estimation for REAL matrices:
 *>    STRSYL, STREXC, STRSNA and STRSEN.
 *>    STRSYL, STRSYL3, STREXC, STRSNA and STRSEN.
 *> \endverbatim
 *
 *  Arguments:
@@ -82,7 +82,7 @@
     $                   WI( NMAX ), WORK( NMAX ), WR( NMAX )
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           CHKXER, STREXC, STRSEN, STRSNA, STRSYL
      EXTERNAL           CHKXER, STREXC, STRSEN, STRSNA, STRSYL, STRSYL3
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -141,6 +141,43 @@
      CALL CHKXER( 'STRSYL', INFOT, NOUT, LERR, OK )
      NT = NT + 8
 *
 *     Test STRSYL3
 *
      SRNAMT = 'STRSYL3'
      INFOT = 1
      CALL STRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 2
      CALL STRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 3
      CALL STRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 4
      CALL STRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 5
      CALL STRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 7
      CALL STRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 9
      CALL STRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 11
      CALL STRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
     $              IWORK, NMAX, WORK, NMAX, INFO )
      CALL CHKXER( 'STRSYL3', INFOT, NOUT, LERR, OK )
      NT = NT + 8
 *
 *     Test STREXC
 *
      SRNAMT = 'STREXC'
--- a/lapack-netlib/TESTING/EIG/ssyl01.f
+++ b/lapack-netlib/TESTING/EIG/ssyl01.f
@@ -0,0 +1,288 @@
 *> \brief \b SSYL01
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE SSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
 *
 *      .. Scalar Arguments ..
 *      INTEGER            KNT
 *      REAL               THRESH
 *      ..
 *      .. Array Arguments ..
 *      INTEGER            NFAIL( 3 ), NINFO( 2 )
 *      REAL               RMAX( 2 )
 *      ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> SSYL01 tests STRSYL and STRSYL3, routines for solving the Sylvester matrix
 *> equation
 *>
 *>    op(A)*X + ISGN*X*op(B) = scale*C,
 *>
 *> A and B are assumed to be in Schur canonical form, op() represents an
 *> optional transpose, and ISGN can be -1 or +1.  Scale is an output
 *> less than or equal to 1, chosen to avoid overflow in X.
 *>
 *> The test code verifies that the following residual does not exceed
 *> the provided threshold:
 *>
 *>    norm(op(A)*X + ISGN*X*op(B) - scale*C) /
 *>        (EPS*max(norm(A),norm(B))*norm(X))
 *>
 *> This routine complements SGET35 by testing with larger,
 *> random matrices, of which some require rescaling of X to avoid overflow.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] THRESH
 *> \verbatim
 *>          THRESH is REAL
 *>          A test will count as "failed" if the residual, computed as
 *>          described above, exceeds THRESH.
 *> \endverbatim
 *>
 *> \param[out] NFAIL
 *> \verbatim
 *>          NFAIL is INTEGER array, dimension (3)
 *>          NFAIL(1) = No. of times residual STRSYL exceeds threshold THRESH
 *>          NFAIL(2) = No. of times residual STRSYL3 exceeds threshold THRESH
 *>          NFAIL(3) = No. of times STRSYL3 and STRSYL deviate
 *> \endverbatim
 *>
 *> \param[out] RMAX
 *> \verbatim
 *>          RMAX is REAL, dimension (2)
 *>          RMAX(1) = Value of the largest test ratio of STRSYL
 *>          RMAX(2) = Value of the largest test ratio of STRSYL3
 *> \endverbatim
 *>
 *> \param[out] NINFO
 *> \verbatim
 *>          NINFO is INTEGER array, dimension (2)
 *>          NINFO(1) = No. of times STRSYL returns an expected INFO
 *>          NINFO(2) = No. of times STRSYL3 returns an expected INFO
 *> \endverbatim
 *>
 *> \param[out] KNT
 *> \verbatim
 *>          KNT is INTEGER
 *>          Total number of examples tested.
 *> \endverbatim
 *
 *  -- LAPACK test routine --
      SUBROUTINE SSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
      IMPLICIT NONE
 *
 *  -- LAPACK test routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            KNT
      REAL               THRESH
 *     ..
 *     .. Array Arguments ..
      INTEGER            NFAIL( 3 ), NINFO( 2 )
      REAL               RMAX( 2 )
 *     ..
 *
 *  =====================================================================
 *     ..
 *     .. Parameters ..
      REAL               ZERO, ONE
      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
      INTEGER            MAXM, MAXN, LDSWORK
      PARAMETER          ( MAXM = 101, MAXN = 138, LDSWORK = 18 )
 *     ..
 *     .. Local Scalars ..
      CHARACTER          TRANA, TRANB
      INTEGER            I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
     $                   KUA, KLB, KUB, LIWORK, M, N
      REAL               ANRM, BNRM, BIGNUM, EPS, RES, RES1, RMUL,
     $                   SCALE, SCALE3, SMLNUM, TNRM, XNRM
 *     ..
 *     .. Local Arrays ..
      REAL               A( MAXM, MAXM ), B( MAXN, MAXN ),
     $                   C( MAXM, MAXN ), CC( MAXM, MAXN ),
     $                   X( MAXM, MAXN ),
     $                   DUML( MAXM ), DUMR( MAXN ),
     $                   D( MAX( MAXM, MAXN ) ), DUM( MAXN ),
     $                   SWORK( LDSWORK, 54 ), VM( 2 )
      INTEGER            ISEED( 4 ), IWORK( MAXM + MAXN + 2 ), IDUM( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            SISNAN
      REAL               SLAMCH, SLANGE
      EXTERNAL           SISNAN, SLAMCH, SLANGE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           SLATMR, SLACPY, SGEMM, STRSYL, STRSYL3
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, REAL, MAX
 *     ..
 *     .. Executable Statements ..
 *
 *     Get machine parameters
 *
      EPS = SLAMCH( 'P' )
      SMLNUM = SLAMCH( 'S' ) / EPS
      BIGNUM = ONE / SMLNUM
 *
      VM( 1 ) = ONE
      VM( 2 ) = 0.05E+0
 *
 *     Begin test loop
 *
      NINFO( 1 ) = 0
      NINFO( 2 ) = 0
      NFAIL( 1 ) = 0
      NFAIL( 2 ) = 0
      NFAIL( 3 ) = 0
      RMAX( 1 ) = ZERO
      RMAX( 2 ) = ZERO
      KNT = 0
      DO I = 1, 4
         ISEED( I ) = 1
      END DO
      SCALE = ONE
      SCALE3 = ONE
      LIWORK = MAXM + MAXN + 2
      DO J = 1, 2
         DO ISGN = -1, 1, 2
 *           Reset seed (overwritten by LATMR)
            DO I = 1, 4
               ISEED( I ) = 1
            END DO
            DO M = 32, MAXM, 71
               KLA = 0
               KUA = M - 1
               CALL SLATMR( M, M, 'S', ISEED, 'N', D,
     $                      6, ONE, ONE, 'T', 'N',
     $                      DUML, 1, ONE, DUMR, 1, ONE,
     $                      'N', IWORK, KLA, KUA, ZERO,
     $                      ONE, 'NO', A, MAXM, IWORK, IINFO )
               DO I = 1, M
                  A( I, I ) = A( I, I ) * VM( J )
               END DO
               ANRM = SLANGE( 'M', M, M, A, MAXM, DUM )
               DO N = 51, MAXN, 47
                  KLB = 0
                  KUB = N - 1
                  CALL SLATMR( N, N, 'S', ISEED, 'N', D,
     $                         6, ONE, ONE, 'T', 'N',
     $                         DUML, 1, ONE, DUMR, 1, ONE,
     $                         'N', IWORK, KLB, KUB, ZERO,
     $                         ONE, 'NO', B, MAXN, IWORK, IINFO )
                  BNRM = SLANGE( 'M', N, N, B, MAXN, DUM )
                  TNRM = MAX( ANRM, BNRM )
                  CALL SLATMR( M, N, 'S', ISEED, 'N', D,
     $                         6, ONE, ONE, 'T', 'N',
     $                         DUML, 1, ONE, DUMR, 1, ONE,
     $                         'N', IWORK, M, N, ZERO, ONE,
     $                         'NO', C, MAXM, IWORK, IINFO )
                  DO ITRANA = 1, 2
                     IF( ITRANA.EQ.1 ) THEN
                        TRANA = 'N'
                     END IF
                     IF( ITRANA.EQ.2 ) THEN
                        TRANA = 'T'
                     END IF
                     DO ITRANB = 1, 2
                        IF( ITRANB.EQ.1 ) THEN
                           TRANB = 'N'
                        END IF
                        IF( ITRANB.EQ.2 ) THEN
                           TRANB = 'T'
                        END IF
                        KNT = KNT + 1
 *
                        CALL SLACPY( 'All', M, N, C, MAXM, X, MAXM)
                        CALL SLACPY( 'All', M, N, C, MAXM, CC, MAXM)
                        CALL STRSYL( TRANA, TRANB, ISGN, M, N, 
     $                               A, MAXM, B, MAXN, X, MAXM,
     $                               SCALE, IINFO )
                        IF( IINFO.NE.0 )
     $                     NINFO( 1 ) = NINFO( 1 ) + 1
                        XNRM = SLANGE( 'M', M, N, X, MAXM, DUM )
                        RMUL = ONE
                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
                              RMUL = ONE / MAX( XNRM, TNRM )
                           END IF
                        END IF
                        CALL SGEMM( TRANA, 'N', M, N, M, RMUL,
     $                              A, MAXM, X, MAXM, -SCALE*RMUL,
     $                              C, MAXM )
                        CALL SGEMM( 'N', TRANB, M, N, N,
     $                               REAL( ISGN )*RMUL, X, MAXM, B,
     $                               MAXN, ONE, C, MAXM )
                        RES1 = SLANGE( 'M', M, N, C, MAXM, DUM )
                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
     $                              ( ( RMUL*TNRM )*EPS )*XNRM )
                        IF( RES.GT.THRESH )
     $                     NFAIL( 1 ) = NFAIL( 1 ) + 1
                        IF( RES.GT.RMAX( 1 ) )
     $                     RMAX( 1 ) = RES
 *
                        CALL SLACPY( 'All', M, N, C, MAXM, X, MAXM )
                        CALL SLACPY( 'All', M, N, C, MAXM, CC, MAXM )
                        CALL STRSYL3( TRANA, TRANB, ISGN, M, N,
     $                                A, MAXM, B, MAXN, X, MAXM,
     $                                SCALE3, IWORK, LIWORK,
     $                                SWORK, LDSWORK, INFO)
                        IF( INFO.NE.0 )
     $                     NINFO( 2 ) = NINFO( 2 ) + 1
                        XNRM = SLANGE( 'M', M, N, X, MAXM, DUM )
                        RMUL = ONE
                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
                              RMUL = ONE / MAX( XNRM, TNRM )
                           END IF
                        END IF
                        CALL SGEMM( TRANA, 'N', M, N, M, RMUL,
     $                              A, MAXM, X, MAXM, -SCALE3*RMUL,
     $                              CC, MAXM )
                        CALL SGEMM( 'N', TRANB, M, N, N,
     $                              REAL( ISGN )*RMUL, X, MAXM, B,
     $                              MAXN, ONE, CC, MAXM )
                        RES1 = SLANGE( 'M', M, N, CC, MAXM, DUM )
                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
     $                             ( ( RMUL*TNRM )*EPS )*XNRM )
 *                       Verify that TRSYL3 only flushes if TRSYL flushes (but
 *                       there may be cases where TRSYL3 avoid flushing).
                        IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR. 
     $                      IINFO.NE.INFO ) THEN
                           NFAIL( 3 ) = NFAIL( 3 ) + 1
                        END IF
                        IF( RES.GT.THRESH .OR. SISNAN( RES ) )
     $                     NFAIL( 2 ) = NFAIL( 2 ) + 1
                        IF( RES.GT.RMAX( 2 ) )
     $                     RMAX( 2 ) = RES
                     END DO
                  END DO
               END DO
            END DO
         END DO
      END DO
 *
      RETURN
 *
 *     End of SSYL01
 *
      END
--- a/lapack-netlib/TESTING/EIG/zchkec.f
+++ b/lapack-netlib/TESTING/EIG/zchkec.f
@@ -88,17 +88,17 @@
 *     .. Local Scalars ..
      LOGICAL            OK
      CHARACTER*3        PATH
      INTEGER            KTREXC, KTRSEN, KTRSNA, KTRSYL, LTREXC, LTRSYL,
     $                   NTESTS, NTREXC, NTRSYL
      DOUBLE PRECISION   EPS, RTREXC, RTRSYL, SFMIN
      INTEGER            KTREXC, KTRSEN, KTRSNA, KTRSYL, KTRSYL3,
     $                   LTREXC, LTRSYL, NTESTS, NTREXC, NTRSYL
      DOUBLE PRECISION   EPS, RTREXC, SFMIN
 *     ..
 *     .. Local Arrays ..
      INTEGER            LTRSEN( 3 ), LTRSNA( 3 ), NTRSEN( 3 ),
     $                   NTRSNA( 3 )
      DOUBLE PRECISION   RTRSEN( 3 ), RTRSNA( 3 )
      INTEGER            FTRSYL( 3 ), ITRSYL( 2 ), LTRSEN( 3 ),
     $                   LTRSNA( 3 ), NTRSEN( 3 ), NTRSNA( 3 )
      DOUBLE PRECISION   RTRSEN( 3 ), RTRSNA( 3 ), RTRSYL( 2 )
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ZERREC, ZGET35, ZGET36, ZGET37, ZGET38
      EXTERNAL           ZERREC, ZGET35, ZGET36, ZGET37, ZGET38, ZSYL01
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLAMCH
@@ -120,10 +120,24 @@
     $   CALL ZERREC( PATH, NOUT )
 *
      OK = .TRUE.
      CALL ZGET35( RTRSYL, LTRSYL, NTRSYL, KTRSYL, NIN )
      IF( RTRSYL.GT.THRESH ) THEN
      CALL ZGET35( RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL, NIN )
      IF( RTRSYL( 1 ).GT.THRESH ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9999 )RTRSYL, LTRSYL, NTRSYL, KTRSYL
         WRITE( NOUT, FMT = 9999 )RTRSYL( 1 ), LTRSYL, NTRSYL, KTRSYL
      END IF
 *
      CALL ZSYL01( THRESH, FTRSYL, RTRSYL, ITRSYL, KTRSYL3 )
      IF( FTRSYL( 1 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9970 )FTRSYL( 1 ), RTRSYL( 1 ), THRESH
      END IF
      IF( FTRSYL( 2 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9971 )FTRSYL( 2 ), RTRSYL( 2 ), THRESH
      END IF
      IF( FTRSYL( 3 ).GT.0 ) THEN
         OK = .FALSE.
         WRITE( NOUT, FMT = 9972 )FTRSYL( 3 )
      END IF
 *
      CALL ZGET36( RTREXC, LTREXC, NTREXC, KTREXC, NIN )
@@ -148,7 +162,7 @@
         WRITE( NOUT, FMT = 9996 )RTRSEN, LTRSEN, NTRSEN, KTRSEN
      END IF
 *
      NTESTS = KTRSYL + KTREXC + KTRSNA + KTRSEN
      NTESTS = KTRSYL + KTRSYL3 + KTREXC + KTRSNA + KTRSEN
      IF( OK )
     $   WRITE( NOUT, FMT = 9995 )PATH, NTESTS
 *
@@ -169,6 +183,12 @@
     $      / ' Safe minimum (SFMIN)             = ', D16.6, / )
 9992 FORMAT( ' Routines pass computational tests if test ratio is ',
     $      'less than', F8.2, / / )
 9970 FORMAT( 'Error in ZTRSYL: ', I8, ' tests fail the threshold.', /
     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
 9971 FORMAT( 'Error in ZTRSYL3: ', I8, ' tests fail the threshold.', /
     $      'Maximum test ratio =', D12.3, ' threshold =', D12.3 )
 9972 FORMAT( 'ZTRSYL and ZTRSYL3 compute an inconsistent scale ',
     $      'factor in ', I8, ' tests.')
      RETURN
 *
 *     End of ZCHKEC
--- a/lapack-netlib/TESTING/EIG/zerrec.f
+++ b/lapack-netlib/TESTING/EIG/zerrec.f
@@ -23,7 +23,7 @@
 *>
 *> ZERREC tests the error exits for the routines for eigen- condition
 *> estimation for DOUBLE PRECISION matrices:
 *>    ZTRSYL, ZTREXC, ZTRSNA and ZTRSEN.
 *>    ZTRSYL, ZTRSYL3, ZTREXC, ZTRSNA and ZTRSEN.
 *> \endverbatim
 *
 *  Arguments:
@@ -77,7 +77,7 @@
 *     ..
 *     .. Local Arrays ..
      LOGICAL            SEL( NMAX )
      DOUBLE PRECISION   RW( LW ), S( NMAX ), SEP( NMAX )
      DOUBLE PRECISION   RW( LW ), S( NMAX ), SEP( NMAX ), SWORK( NMAX )
      COMPLEX*16         A( NMAX, NMAX ), B( NMAX, NMAX ),
     $                   C( NMAX, NMAX ), WORK( LW ), X( NMAX )
 *     ..
@@ -141,6 +141,43 @@
      CALL CHKXER( 'ZTRSYL', INFOT, NOUT, LERR, OK )
      NT = NT + 8
 *
 *     Test ZTRSYL3
 *
      SRNAMT = 'ZTRSYL3'
      INFOT = 1
      CALL ZTRSYL3( 'X', 'N', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 2
      CALL ZTRSYL3( 'N', 'X', 1, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 3
      CALL ZTRSYL3( 'N', 'N', 0, 0, 0, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 4
      CALL ZTRSYL3( 'N', 'N', 1, -1, 0, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 5
      CALL ZTRSYL3( 'N', 'N', 1, 0, -1, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 7
      CALL ZTRSYL3( 'N', 'N', 1, 2, 0, A, 1, B, 1, C, 2, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 9
      CALL ZTRSYL3( 'N', 'N', 1, 0, 2, A, 1, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
      INFOT = 11
      CALL ZTRSYL3( 'N', 'N', 1, 2, 0, A, 2, B, 1, C, 1, SCALE,
     $              SWORK, NMAX, INFO )
      CALL CHKXER( 'ZTRSYL3', INFOT, NOUT, LERR, OK )
      NT = NT + 8
 *
 *     Test ZTREXC
 *
      SRNAMT = 'ZTREXC'
--- a/lapack-netlib/TESTING/EIG/zsyl01.f
+++ b/lapack-netlib/TESTING/EIG/zsyl01.f
@@ -0,0 +1,294 @@
 *> \brief \b ZSYL01
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
 *
 *     .. Scalar Arguments ..
 *     INTEGER            KNT
 *     DOUBLE PRECISION   THRESH
 *     ..
 *     .. Array Arguments ..
 *     INTEGER            NFAIL( 3 ), NINFO( 2 )
 *     DOUBLE PRECISION   RMAX( 2 )
 *     ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> ZSYL01 tests ZTRSYL and ZTRSYL3, routines for solving the Sylvester matrix
 *> equation
 *>
 *>    op(A)*X + ISGN*X*op(B) = scale*C,
 *>
 *> where op(A) and op(B) are both upper triangular form, op() represents an
 *> optional conjugate transpose, and ISGN can be -1 or +1. Scale is an output
 *> less than or equal to 1, chosen to avoid overflow in X.
 *>
 *> The test code verifies that the following residual does not exceed
 *> the provided threshold:
 *>
 *>    norm(op(A)*X + ISGN*X*op(B) - scale*C) /
 *>        (EPS*max(norm(A),norm(B))*norm(X))
 *>
 *> This routine complements ZGET35 by testing with larger,
 *> random matrices, of which some require rescaling of X to avoid overflow.
 *>
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] THRESH
 *> \verbatim
 *>          THRESH is DOUBLE PRECISION
 *>          A test will count as "failed" if the residual, computed as
 *>          described above, exceeds THRESH.
 *> \endverbatim
 *>
 *> \param[out] NFAIL
 *> \verbatim
 *>          NFAIL is INTEGER array, dimension (3)
 *>          NFAIL(1) = No. of times residual ZTRSYL exceeds threshold THRESH
 *>          NFAIL(2) = No. of times residual ZTRSYL3 exceeds threshold THRESH
 *>          NFAIL(3) = No. of times ZTRSYL3 and ZTRSYL deviate
 *> \endverbatim
 *>
 *> \param[out] RMAX
 *> \verbatim
 *>          RMAX is DOUBLE PRECISION array, dimension (2)
 *>          RMAX(1) = Value of the largest test ratio of ZTRSYL
 *>          RMAX(2) = Value of the largest test ratio of ZTRSYL3
 *> \endverbatim
 *>
 *> \param[out] NINFO
 *> \verbatim
 *>          NINFO is INTEGER array, dimension (2)
 *>          NINFO(1) = No. of times ZTRSYL returns an expected INFO
 *>          NINFO(2) = No. of times ZTRSYL3 returns an expected INFO
 *> \endverbatim
 *>
 *> \param[out] KNT
 *> \verbatim
 *>          KNT is INTEGER
 *>          Total number of examples tested.
 *> \endverbatim
 *
 *  -- LAPACK test routine --
      SUBROUTINE ZSYL01( THRESH, NFAIL, RMAX, NINFO, KNT )
      IMPLICIT NONE
 *
 *  -- LAPACK test routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      INTEGER            KNT
      DOUBLE PRECISION   THRESH
 *     ..
 *     .. Array Arguments ..
      INTEGER            NFAIL( 3 ), NINFO( 2 )
      DOUBLE PRECISION   RMAX( 2 )
 *     ..
 *
 *  =====================================================================
 *     ..
 *     .. Parameters ..
      COMPLEX*16         CONE
      PARAMETER          ( CONE = ( 1.0D0, 0.0D+0 ) )
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
      INTEGER            MAXM, MAXN, LDSWORK
      PARAMETER          ( MAXM = 185, MAXN = 192, LDSWORK = 36 )
 *     ..
 *     .. Local Scalars ..
      CHARACTER          TRANA, TRANB
      INTEGER            I, INFO, IINFO, ISGN, ITRANA, ITRANB, J, KLA,
     $                   KUA, KLB, KUB, M, N
      DOUBLE PRECISION   ANRM, BNRM, BIGNUM, EPS, RES, RES1,
     $                   SCALE, SCALE3, SMLNUM, TNRM, XNRM
      COMPLEX*16         RMUL
 *     ..
 *     .. Local Arrays ..
      COMPLEX*16         A( MAXM, MAXM ), B( MAXN, MAXN ),
     $                   C( MAXM, MAXN ), CC( MAXM, MAXN ),
     $                   X( MAXM, MAXN ),
     $                   DUML( MAXM ), DUMR( MAXN ),
     $                   D( MIN( MAXM, MAXN ) )
      DOUBLE PRECISION   SWORK( LDSWORK, 103 ), DUM( MAXN ), VM( 2 )
      INTEGER            ISEED( 4 ), IWORK( MAXM + MAXN + 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            DISNAN
      DOUBLE PRECISION   DLAMCH, ZLANGE
      EXTERNAL           DISNAN, DLAMCH, ZLANGE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ZLATMR, ZLACPY, ZGEMM, ZTRSYL, ZTRSYL3
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, MAX, SQRT
 *     ..
 *     .. Executable Statements ..
 *
 *     Get machine parameters
 *
      EPS = DLAMCH( 'P' )
      SMLNUM = DLAMCH( 'S' ) / EPS
      BIGNUM = ONE / SMLNUM
 *
 *     Expect INFO = 0
      VM( 1 ) = ONE
 *     Expect INFO = 1
      VM( 2 ) = 0.05D+0
 *
 *     Begin test loop
 *
      NINFO( 1 ) = 0
      NINFO( 2 ) = 0
      NFAIL( 1 ) = 0
      NFAIL( 2 ) = 0
      NFAIL( 3 ) = 0
      RMAX( 1 ) = ZERO
      RMAX( 2 ) = ZERO
      KNT = 0
      ISEED( 1 ) = 1
      ISEED( 2 ) = 1
      ISEED( 3 ) = 1
      ISEED( 4 ) = 1
      SCALE = ONE
      SCALE3 = ONE
      DO J = 1, 2
         DO ISGN = -1, 1, 2
 *           Reset seed (overwritten by LATMR)
            ISEED( 1 ) = 1
            ISEED( 2 ) = 1
            ISEED( 3 ) = 1
            ISEED( 4 ) = 1
            DO M = 32, MAXM, 51
               KLA = 0
               KUA = M - 1
               CALL ZLATMR( M, M, 'S', ISEED, 'N', D,
     $                      6, ONE, CONE, 'T', 'N',
     $                      DUML, 1, ONE, DUMR, 1, ONE,
     $                      'N', IWORK, KLA, KUA, ZERO,
     $                      ONE, 'NO', A, MAXM, IWORK,
     $                      IINFO )
               DO I = 1, M
                  A( I, I ) = A( I, I ) * VM( J )
               END DO
               ANRM = ZLANGE( 'M', M, M, A, MAXM, DUM )
               DO N = 51, MAXN, 47
                  KLB = 0
                  KUB = N - 1
                  CALL ZLATMR( N, N, 'S', ISEED, 'N', D,
     $                         6, ONE, CONE, 'T', 'N',
     $                         DUML, 1, ONE, DUMR, 1, ONE,
     $                         'N', IWORK, KLB, KUB, ZERO,
     $                         ONE, 'NO', B, MAXN, IWORK,
     $                         IINFO )
                  DO I = 1, N
                     B( I, I ) = B( I, I ) * VM ( J )
                  END DO
                  BNRM = ZLANGE( 'M', N, N, B, MAXN, DUM )
                  TNRM = MAX( ANRM, BNRM )
                  CALL ZLATMR( M, N, 'S', ISEED, 'N', D,
     $                         6, ONE, CONE, 'T', 'N',
     $                         DUML, 1, ONE, DUMR, 1, ONE,
     $                         'N', IWORK, M, N, ZERO, ONE,
     $                         'NO', C, MAXM, IWORK, IINFO )
                  DO ITRANA = 1, 2
                     IF( ITRANA.EQ.1 )
     $                   TRANA = 'N'
                     IF( ITRANA.EQ.2 )
     $                   TRANA = 'C'
                     DO ITRANB = 1, 2
                        IF( ITRANB.EQ.1 )
     $                     TRANB = 'N'
                        IF( ITRANB.EQ.2 )
     $                     TRANB = 'C'
                        KNT = KNT + 1
 *
                        CALL ZLACPY( 'All', M, N, C, MAXM, X, MAXM)
                        CALL ZLACPY( 'All', M, N, C, MAXM, CC, MAXM)
                        CALL ZTRSYL( TRANA, TRANB, ISGN, M, N, 
     $                               A, MAXM, B, MAXN, X, MAXM,
     $                               SCALE, IINFO )
                        IF( IINFO.NE.0 )
     $                     NINFO( 1 ) = NINFO( 1 ) + 1
                        XNRM = ZLANGE( 'M', M, N, X, MAXM, DUM )
                        RMUL = CONE
                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
                              RMUL = CONE / MAX( XNRM, TNRM )
                           END IF
                        END IF
                        CALL ZGEMM( TRANA, 'N', M, N, M, RMUL,
     $                              A, MAXM, X, MAXM, -SCALE*RMUL,
     $                              CC, MAXM )
                        CALL ZGEMM( 'N', TRANB, M, N, N,
     $                              DBLE( ISGN )*RMUL, X, MAXM, B,
     $                              MAXN, CONE, CC, MAXM )
                        RES1 = ZLANGE( 'M', M, N, CC, MAXM, DUM )
                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
     $                        ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
                        IF( RES.GT.THRESH )
     $                     NFAIL( 1 ) = NFAIL( 1 ) + 1
                        IF( RES.GT.RMAX( 1 ) )
     $                     RMAX( 1 ) = RES
 *
                        CALL ZLACPY( 'All', M, N, C, MAXM, X, MAXM )
                        CALL ZLACPY( 'All', M, N, C, MAXM, CC, MAXM )
                        CALL ZTRSYL3( TRANA, TRANB, ISGN, M, N,
     $                                A, MAXM, B, MAXN, X, MAXM,
     $                                SCALE3, SWORK, LDSWORK, INFO)
                        IF( INFO.NE.0 )
     $                     NINFO( 2 ) = NINFO( 2 ) + 1
                        XNRM = ZLANGE( 'M', M, N, X, MAXM, DUM )
                        RMUL = CONE
                        IF( XNRM.GT.ONE .AND. TNRM.GT.ONE ) THEN
                           IF( XNRM.GT.BIGNUM / TNRM ) THEN
                              RMUL = CONE / MAX( XNRM, TNRM )
                           END IF
                        END IF
                        CALL ZGEMM( TRANA, 'N', M, N, M, RMUL,
     $                              A, MAXM, X, MAXM, -SCALE3*RMUL,
     $                              CC, MAXM )
                        CALL ZGEMM( 'N', TRANB, M, N, N,
     $                              DBLE( ISGN )*RMUL, X, MAXM, B,
     $                              MAXN, CONE, CC, MAXM )
                        RES1 = ZLANGE( 'M', M, N, CC, MAXM, DUM )
                        RES = RES1 / MAX( SMLNUM, SMLNUM*XNRM,
     $                             ( ( ABS( RMUL )*TNRM )*EPS )*XNRM )
 *                       Verify that TRSYL3 only flushes if TRSYL flushes (but
 *                       there may be cases where TRSYL3 avoid flushing).
                        IF( SCALE3.EQ.ZERO .AND. SCALE.GT.ZERO .OR.
     $                      IINFO.NE.INFO ) THEN
                           NFAIL( 3 ) = NFAIL( 3 ) + 1
                        END IF
                        IF( RES.GT.THRESH .OR. DISNAN( RES ) )
     $                     NFAIL( 2 ) = NFAIL( 2 ) + 1
                        IF( RES.GT.RMAX( 2 ) )
     $                     RMAX( 2 ) = RES
                     END DO
                  END DO
               END DO
            END DO
         END DO
      END DO
 *
      RETURN
 *
 *     End of ZSYL01
 *
      END
--- a/lapack-netlib/TESTING/LIN/cchktr.f
+++ b/lapack-netlib/TESTING/LIN/cchktr.f
@@ -31,7 +31,7 @@
 *>
 *> \verbatim
 *>
 *> CCHKTR tests CTRTRI, -TRS, -RFS, and -CON, and CLATRS
 *> CCHKTR tests CTRTRI, -TRS, -RFS, and -CON, and CLATRS(3)
 *> \endverbatim
 *
 *  Arguments:
@@ -184,7 +184,7 @@
      INTEGER            NTYPE1, NTYPES
      PARAMETER          ( NTYPE1 = 10, NTYPES = 18 )
      INTEGER            NTESTS
      PARAMETER          ( NTESTS = 9 )
      PARAMETER          ( NTESTS = 10 )
      INTEGER            NTRAN
      PARAMETER          ( NTRAN = 3 )
      REAL               ONE, ZERO
@@ -195,13 +195,13 @@
      CHARACTER*3        PATH
      INTEGER            I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
     $                   IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
      REAL               AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
     $                   RCONDO, SCALE
      REAL               AINVNM, ANORM, BIGNUM, DUMMY, RCOND, RCONDC,
     $                   RCONDI, RCONDO, RES, SCALE, SLAMCH
 *     ..
 *     .. Local Arrays ..
      CHARACTER          TRANSS( NTRAN ), UPLOS( 2 )
      INTEGER            ISEED( 4 ), ISEEDY( 4 )
      REAL               RESULT( NTESTS )
      REAL               RESULT( NTESTS ), SCALE3( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
@@ -210,9 +210,9 @@
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ALAERH, ALAHD, ALASUM, CCOPY, CERRTR, CGET04,
     $                   CLACPY, CLARHS, CLATRS, CLATTR, CTRCON, CTRRFS,
     $                   CTRT01, CTRT02, CTRT03, CTRT05, CTRT06, CTRTRI,
     $                   CTRTRS, XLAENV
     $                   CLACPY, CLARHS, CLATRS, CLATRS3, CLATTR,
     $                   CSSCAL, CTRCON, CTRRFS, CTRT01, CTRT02, CTRT03,
     $                   CTRT05, CTRT06, CTRTRI, CTRTRS, XLAENV, SLAMCH
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -236,6 +236,7 @@
 *
      PATH( 1: 1 ) = 'Complex precision'
      PATH( 2: 3 ) = 'TR'
      BIGNUM = SLAMCH('Overflow') / SLAMCH('Precision')
      NRUN = 0
      NFAIL = 0
      NERRS = 0
@@ -380,7 +381,7 @@
 *                       This line is needed on a Sun SPARCstation.
 *
                        IF( N.GT.0 )
     $                     DUMMY = A( 1 )
     $                     DUMMY = REAL( A( 1 ) )
 *
                        CALL CTRT02( UPLO, TRANS, DIAG, N, NRHS, A, LDA,
     $                               X, LDA, B, LDA, WORK, RWORK,
@@ -535,6 +536,32 @@
     $                         RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
     $                         RESULT( 9 ) )
 *
 *+    TEST 10
 *                 Solve op(A)*X = B.
 *
                  SRNAMT = 'CLATRS3'
                  CALL CCOPY( N, X, 1, B, 1 )
                  CALL CCOPY( N, X, 1, B, 1 )
                  CALL CSCAL( N, BIGNUM, B( N+1 ), 1 )
                  CALL CLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
     $                          B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
     $                          INFO )
 *
 *                 Check error code from CLATRS3.
 *
                  IF( INFO.NE.0 )
     $               CALL ALAERH( PATH, 'CLATRS3', INFO, 0,
     $                            UPLO // TRANS // DIAG // 'Y', N, N,
     $                            -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
                  CALL CTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
     $                         SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA,
     $                         X, LDA, WORK, RESULT( 10 ) )
                  CALL CSSCAL( N, BIGNUM, X, 1 )
                  CALL CTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
     $                         SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
     $                         X, LDA, WORK, RESULT( 10 ) )
                  RESULT( 10 ) = MAX( RESULT( 10 ), RES )
 *
 *                 Print information about the tests that did not pass
 *                 the threshold.
 *
@@ -552,7 +579,14 @@
     $                  DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
                     NFAIL = NFAIL + 1
                  END IF
                  NRUN = NRUN + 2
                  IF( RESULT( 10 ).GE.THRESH ) THEN
                     IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
     $                  CALL ALAHD( NOUT, PATH )
                     WRITE( NOUT, FMT = 9996 )'CLATRS3', UPLO, TRANS,
     $                  DIAG, 'N', N, IMAT, 10, RESULT( 10 )
                     NFAIL = NFAIL + 1
                  END IF
                  NRUN = NRUN + 3
   90          CONTINUE
  100       CONTINUE
  110    CONTINUE
--- a/lapack-netlib/TESTING/LIN/cerrtr.f
+++ b/lapack-netlib/TESTING/LIN/cerrtr.f
@@ -82,9 +82,10 @@
      EXTERNAL           LSAMEN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ALAESM, CHKXER, CLATBS, CLATPS, CLATRS, CTBCON,
     $                   CTBRFS, CTBTRS, CTPCON, CTPRFS, CTPTRI, CTPTRS,
     $                   CTRCON, CTRRFS, CTRTI2, CTRTRI, CTRTRS
      EXTERNAL           ALAESM, CHKXER, CLATBS, CLATPS, CLATRS,
     $                   CLATRS3, CTBCON, CTBRFS, CTBTRS, CTPCON,
     $                   CTPRFS, CTPTRI, CTPTRS, CTRCON, CTRRFS, CTRTI2,
     $                   CTRTRI, CTRTRS
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -240,6 +241,46 @@
         CALL CLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, RW, INFO )
         CALL CHKXER( 'CLATRS', INFOT, NOUT, LERR, OK )
 *
 *        CLATRS3
 *
         SRNAMT = 'CLATRS3'
         INFOT = 1
         CALL CLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 2
         CALL CLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 3
         CALL CLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 4
         CALL CLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 5
         CALL CLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 6
         CALL CLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 8
         CALL CLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 10
         CALL CLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 14
         CALL CLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 0, INFO )
         CALL CHKXER( 'CLATRS3', INFOT, NOUT, LERR, OK )
 *
 *     Test error exits for the packed triangular routines.
 *
      ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN
--- a/lapack-netlib/TESTING/LIN/dchktr.f
+++ b/lapack-netlib/TESTING/LIN/dchktr.f
@@ -30,7 +30,7 @@
 *>
 *> \verbatim
 *>
 *> DCHKTR tests DTRTRI, -TRS, -RFS, and -CON, and DLATRS
 *> DCHKTR tests DTRTRI, -TRS, -RFS, and -CON, and DLATRS(3)
 *> \endverbatim
 *
 *  Arguments:
@@ -187,7 +187,7 @@
      INTEGER            NTYPE1, NTYPES
      PARAMETER          ( NTYPE1 = 10, NTYPES = 18 )
      INTEGER            NTESTS
      PARAMETER          ( NTESTS = 9 )
      PARAMETER          ( NTESTS = 10 )
      INTEGER            NTRAN
      PARAMETER          ( NTRAN = 3 )
      DOUBLE PRECISION   ONE, ZERO
@@ -198,13 +198,13 @@
      CHARACTER*3        PATH
      INTEGER            I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
     $                   IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
      DOUBLE PRECISION   AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
     $                   RCONDO, SCALE
      DOUBLE PRECISION   AINVNM, ANORM, BIGNUM, DLAMCH, DUMMY, RCOND,
     $                   RCONDC, RCONDI, RCONDO, RES, SCALE
 *     ..
 *     .. Local Arrays ..
      CHARACTER          TRANSS( NTRAN ), UPLOS( 2 )
      INTEGER            ISEED( 4 ), ISEEDY( 4 )
      DOUBLE PRECISION   RESULT( NTESTS )
      DOUBLE PRECISION   RESULT( NTESTS ), SCALE3( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
@@ -213,9 +213,9 @@
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ALAERH, ALAHD, ALASUM, DCOPY, DERRTR, DGET04,
     $                   DLACPY, DLARHS, DLATRS, DLATTR, DTRCON, DTRRFS,
     $                   DTRT01, DTRT02, DTRT03, DTRT05, DTRT06, DTRTRI,
     $                   DTRTRS, XLAENV
     $                   DLACPY, DLAMCH, DSCAL, DLARHS, DLATRS, DLATRS3,
     $                   DLATTR, DTRCON, DTRRFS, DTRT01, DTRT02, DTRT03,
     $                   DTRT05, DTRT06, DTRTRI, DTRTRS, XLAENV
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -239,6 +239,7 @@
 *
      PATH( 1: 1 ) = 'Double precision'
      PATH( 2: 3 ) = 'TR'
      BIGNUM = DLAMCH('Overflow') / DLAMCH('Precision')
      NRUN = 0
      NFAIL = 0
      NERRS = 0
@@ -539,6 +540,32 @@
     $                         RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
     $                         RESULT( 9 ) )
 *
 *+    TEST 10
 *                 Solve op(A)*X = B
 *
                  SRNAMT = 'DLATRS3'
                  CALL DCOPY( N, X, 1, B, 1 )
                  CALL DCOPY( N, X, 1, B( N+1 ), 1 )
                  CALL DSCAL( N, BIGNUM, B( N+1 ), 1 )
                  CALL DLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
     $                          B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
     $                          INFO )
 *
 *                 Check error code from DLATRS3.
 *
                  IF( INFO.NE.0 )
     $               CALL ALAERH( PATH, 'DLATRS3', INFO, 0,
     $                            UPLO // TRANS // DIAG // 'N', N, N,
     $                            -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
                  CALL DTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
     $                         SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA,
     $                         X, LDA, WORK, RESULT( 10 ) )
                  CALL DSCAL( N, BIGNUM, X, 1 )
                  CALL DTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
     $                         SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
     $                         X, LDA, WORK, RES )
                  RESULT( 10 ) = MAX( RESULT( 10 ), RES )
 *
 *                 Print information about the tests that did not pass
 *                 the threshold.
 *
@@ -556,7 +583,14 @@
     $                  DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
                     NFAIL = NFAIL + 1
                  END IF
                  NRUN = NRUN + 2
                  IF( RESULT( 10 ).GE.THRESH ) THEN
                     IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
     $                  CALL ALAHD( NOUT, PATH )
                     WRITE( NOUT, FMT = 9996 )'DLATRS3', UPLO, TRANS,
     $                  DIAG, 'N', N, IMAT, 10, RESULT( 10 )
                     NFAIL = NFAIL + 1
                  END IF
                  NRUN = NRUN + 3
   90          CONTINUE
  100       CONTINUE
  110    CONTINUE
@@ -569,8 +603,8 @@
 9999 FORMAT( ' UPLO=''', A1, ''', DIAG=''', A1, ''', N=', I5, ', NB=',
     $      I4, ', type ', I2, ', test(', I2, ')= ', G12.5 )
 9998 FORMAT( ' UPLO=''', A1, ''', TRANS=''', A1, ''', DIAG=''', A1,
     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ',
     $      test(', I2, ')= ', G12.5 )
     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ', test(',
     $      I2, ')= ', G12.5 )
 9997 FORMAT( ' NORM=''', A1, ''', UPLO =''', A1, ''', N=', I5, ',',
     $      11X, ' type ', I2, ', test(', I2, ')=', G12.5 )
 9996 FORMAT( 1X, A, '( ''', A1, ''', ''', A1, ''', ''', A1, ''', ''',
--- a/lapack-netlib/TESTING/LIN/derrtr.f
+++ b/lapack-netlib/TESTING/LIN/derrtr.f
@@ -83,9 +83,10 @@
      EXTERNAL           LSAMEN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ALAESM, CHKXER, DLATBS, DLATPS, DLATRS, DTBCON,
     $                   DTBRFS, DTBTRS, DTPCON, DTPRFS, DTPTRI, DTPTRS,
     $                   DTRCON, DTRRFS, DTRTI2, DTRTRI, DTRTRS
      EXTERNAL           ALAESM, CHKXER, DLATBS, DLATPS, DLATRS,
     $                   DLATRS3, DTBCON, DTBRFS, DTBTRS, DTPCON,
     $                   DTPRFS, DTPTRI, DTPTRS, DTRCON, DTRRFS,
     $                   DTRTI2, DTRTRI, DTRTRS
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -244,6 +245,46 @@
         INFOT = 7
         CALL DLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, W, INFO )
         CALL CHKXER( 'DLATRS', INFOT, NOUT, LERR, OK )
 *
 *        DLATRS3
 *
         SRNAMT = 'DLATRS3'
         INFOT = 1
         CALL DLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 2
         CALL DLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 3
         CALL DLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 4
         CALL DLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 5
         CALL DLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 6
         CALL DLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 8
         CALL DLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 10
         CALL DLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 14
         CALL DLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 0, INFO )
         CALL CHKXER( 'DLATRS3', INFOT, NOUT, LERR, OK )
 *
      ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN
 *
--- a/lapack-netlib/TESTING/LIN/schktr.f
+++ b/lapack-netlib/TESTING/LIN/schktr.f
@@ -30,7 +30,7 @@
 *>
 *> \verbatim
 *>
 *> SCHKTR tests STRTRI, -TRS, -RFS, and -CON, and SLATRS
 *> SCHKTR tests STRTRI, -TRS, -RFS, and -CON, and SLATRS(3)
 *> \endverbatim
 *
 *  Arguments:
@@ -187,7 +187,7 @@
      INTEGER            NTYPE1, NTYPES
      PARAMETER          ( NTYPE1 = 10, NTYPES = 18 )
      INTEGER            NTESTS
      PARAMETER          ( NTESTS = 9 )
      PARAMETER          ( NTESTS = 10 )
      INTEGER            NTRAN
      PARAMETER          ( NTRAN = 3 )
      REAL               ONE, ZERO
@@ -198,13 +198,13 @@
      CHARACTER*3        PATH
      INTEGER            I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
     $                   IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
      REAL               AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
     $                   RCONDO, SCALE
      REAL               AINVNM, ANORM, BIGNUM, DUMMY, RCOND, RCONDC,
     $                   RCONDI, RCONDO, RES, SCALE, SLAMCH
 *     ..
 *     .. Local Arrays ..
      CHARACTER          TRANSS( NTRAN ), UPLOS( 2 )
      INTEGER            ISEED( 4 ), ISEEDY( 4 )
      REAL               RESULT( NTESTS )
      REAL               RESULT( NTESTS ), SCALE3( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
@@ -213,9 +213,9 @@
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ALAERH, ALAHD, ALASUM, SCOPY, SERRTR, SGET04,
     $                   SLACPY, SLARHS, SLATRS, SLATTR, STRCON, STRRFS,
     $                   STRT01, STRT02, STRT03, STRT05, STRT06, STRTRI,
     $                   STRTRS, XLAENV
     $                   SLACPY, SLARHS, SLATRS, SLATRS3, SLATTR, SSCAL,
     $                   STRCON, STRRFS, STRT01, STRT02, STRT03, STRT05,
     $                   STRT06, STRTRI, STRTRS, XLAENV, SLAMCH
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -239,6 +239,7 @@
 *
      PATH( 1: 1 ) = 'Single precision'
      PATH( 2: 3 ) = 'TR'
      BIGNUM = SLAMCH('Overflow') / SLAMCH('Precision')
      NRUN = 0
      NFAIL = 0
      NERRS = 0
@@ -539,6 +540,33 @@
     $                         RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
     $                         RESULT( 9 ) )
 *
 *+    TEST 10
 *                 Solve op(A)*X = B
 *
                  SRNAMT = 'SLATRS3'
                  CALL SCOPY( N, X, 1, B, 1 )
                  CALL SCOPY( N, X, 1, B( N+1 ), 1 )
                  CALL SSCAL( N, BIGNUM, B( N+1 ), 1 )
                  CALL SLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
     $                          B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
     $                          INFO )
 *
 *                 Check error code from SLATRS3.
 *
                  IF( INFO.NE.0 )
     $               CALL ALAERH( PATH, 'SLATRS3', INFO, 0,
     $                            UPLO // TRANS // DIAG // 'Y', N, N,
     $                            -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
 *
                  CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
     $                         SCALE3 ( 1 ), RWORK, ONE, B( N+1 ), LDA,
     $                         X, LDA, WORK, RESULT( 10 ) )
                  CALL SSCAL( N, BIGNUM, X, 1 )
                  CALL STRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
     $                         SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
     $                         X, LDA, WORK, RES )
                  RESULT( 10 ) = MAX( RESULT( 10 ), RES )
 *
 *                 Print information about the tests that did not pass
 *                 the threshold.
 *
@@ -556,7 +584,14 @@
     $                  DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
                     NFAIL = NFAIL + 1
                  END IF
                  NRUN = NRUN + 2
                  IF( RESULT( 10 ).GE.THRESH ) THEN
                     IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
     $                  CALL ALAHD( NOUT, PATH )
                     WRITE( NOUT, FMT = 9996 )'SLATRS3', UPLO, TRANS,
     $                  DIAG, 'N', N, IMAT, 10, RESULT( 10 )
                     NFAIL = NFAIL + 1
                  END IF
                  NRUN = NRUN + 3
   90          CONTINUE
  100       CONTINUE
  110    CONTINUE
@@ -569,8 +604,8 @@
 9999 FORMAT( ' UPLO=''', A1, ''', DIAG=''', A1, ''', N=', I5, ', NB=',
     $      I4, ', type ', I2, ', test(', I2, ')= ', G12.5 )
 9998 FORMAT( ' UPLO=''', A1, ''', TRANS=''', A1, ''', DIAG=''', A1,
     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ',
     $      test(', I2, ')= ', G12.5 )
     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ', test(',
     $      I2, ')= ', G12.5 )
 9997 FORMAT( ' NORM=''', A1, ''', UPLO =''', A1, ''', N=', I5, ',',
     $      11X, ' type ', I2, ', test(', I2, ')=', G12.5 )
 9996 FORMAT( 1X, A, '( ''', A1, ''', ''', A1, ''', ''', A1, ''', ''',
--- a/lapack-netlib/TESTING/LIN/serrtr.f
+++ b/lapack-netlib/TESTING/LIN/serrtr.f
@@ -83,9 +83,10 @@
      EXTERNAL           LSAMEN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ALAESM, CHKXER, SLATBS, SLATPS, SLATRS, STBCON,
     $                   STBRFS, STBTRS, STPCON, STPRFS, STPTRI, STPTRS,
     $                   STRCON, STRRFS, STRTI2, STRTRI, STRTRS
      EXTERNAL           ALAESM, CHKXER, SLATBS, SLATPS, SLATRS,
     $                   SLATRS3, STBCON, STBRFS, STBTRS, STPCON,
     $                   STPRFS, STPTRI, STPTRS, STRCON, STRRFS, STRTI2,
     $                   STRTRI, STRTRS
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -244,6 +245,46 @@
         INFOT = 7
         CALL SLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, W, INFO )
         CALL CHKXER( 'SLATRS', INFOT, NOUT, LERR, OK )
 *
 *        SLATRS3
 *
         SRNAMT = 'SLATRS3'
         INFOT = 1
         CALL SLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 2
         CALL SLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 3
         CALL SLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 4
         CALL SLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 5
         CALL SLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 6
         CALL SLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 8
         CALL SLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 10
         CALL SLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, W,
     $                 W( 2 ), 1, INFO )
         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 14
         CALL SLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, W,
     $                 W( 2 ), 0, INFO )
         CALL CHKXER( 'SLATRS3', INFOT, NOUT, LERR, OK )
 *
      ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN
 *
--- a/lapack-netlib/TESTING/LIN/zchktr.f
+++ b/lapack-netlib/TESTING/LIN/zchktr.f
@@ -31,7 +31,7 @@
 *>
 *> \verbatim
 *>
 *> ZCHKTR tests ZTRTRI, -TRS, -RFS, and -CON, and ZLATRS
 *> ZCHKTR tests ZTRTRI, -TRS, -RFS, and -CON, and ZLATRS(3)
 *> \endverbatim
 *
 *  Arguments:
@@ -184,7 +184,7 @@
      INTEGER            NTYPE1, NTYPES
      PARAMETER          ( NTYPE1 = 10, NTYPES = 18 )
      INTEGER            NTESTS
      PARAMETER          ( NTESTS = 9 )
      PARAMETER          ( NTESTS = 10 )
      INTEGER            NTRAN
      PARAMETER          ( NTRAN = 3 )
      DOUBLE PRECISION   ONE, ZERO
@@ -195,13 +195,13 @@
      CHARACTER*3        PATH
      INTEGER            I, IDIAG, IMAT, IN, INB, INFO, IRHS, ITRAN,
     $                   IUPLO, K, LDA, N, NB, NERRS, NFAIL, NRHS, NRUN
      DOUBLE PRECISION   AINVNM, ANORM, DUMMY, RCOND, RCONDC, RCONDI,
     $                   RCONDO, SCALE
      DOUBLE PRECISION   AINVNM, ANORM, BIGNUM, DUMMY, RCOND, RCONDC,
     $                   RCONDI, RCONDO, RES, SCALE, DLAMCH
 *     ..
 *     .. Local Arrays ..
      CHARACTER          TRANSS( NTRAN ), UPLOS( 2 )
      INTEGER            ISEED( 4 ), ISEEDY( 4 )
      DOUBLE PRECISION   RESULT( NTESTS )
      DOUBLE PRECISION   RESULT( NTESTS ), SCALE3( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
@@ -209,10 +209,10 @@
      EXTERNAL           LSAME, ZLANTR
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ALAERH, ALAHD, ALASUM, XLAENV, ZCOPY, ZERRTR,
     $                   ZGET04, ZLACPY, ZLARHS, ZLATRS, ZLATTR, ZTRCON,
     $                   ZTRRFS, ZTRT01, ZTRT02, ZTRT03, ZTRT05, ZTRT06,
     $                   ZTRTRI, ZTRTRS
      EXTERNAL           ALAERH, ALAHD, ALASUM, DLAMCH, XLAENV, ZCOPY,
     $                   ZDSCAL, ZERRTR, ZGET04, ZLACPY, ZLARHS, ZLATRS,
     $                   ZLATRS3, ZLATTR, ZTRCON, ZTRRFS, ZTRT01,
     $                   ZTRT02, ZTRT03, ZTRT05, ZTRT06, ZTRTRI, ZTRTRS
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -236,6 +236,7 @@
 *
      PATH( 1: 1 ) = 'Zomplex precision'
      PATH( 2: 3 ) = 'TR'
      BIGNUM = DLAMCH('Overflow') / DLAMCH('Precision')
      NRUN = 0
      NFAIL = 0
      NERRS = 0
@@ -380,7 +381,7 @@
 *                       This line is needed on a Sun SPARCstation.
 *
                        IF( N.GT.0 )
     $                     DUMMY = A( 1 )
     $                     DUMMY = DBLE( A( 1 ) )
 *
                        CALL ZTRT02( UPLO, TRANS, DIAG, N, NRHS, A, LDA,
     $                               X, LDA, B, LDA, WORK, RWORK,
@@ -535,6 +536,32 @@
     $                         RWORK, ONE, B( N+1 ), LDA, X, LDA, WORK,
     $                         RESULT( 9 ) )
 *
 *+    TEST 10
 *                 Solve op(A)*X = B
 *
                  SRNAMT = 'ZLATRS3'
                  CALL ZCOPY( N, X, 1, B, 1 )
                  CALL ZCOPY( N, X, 1, B( N+1 ), 1 )
                  CALL ZDSCAL( N, BIGNUM, B( N+1 ), 1 )
                  CALL ZLATRS3( UPLO, TRANS, DIAG, 'N', N, 2, A, LDA,
     $                          B, MAX(1, N), SCALE3, RWORK, WORK, NMAX,
     $                          INFO )
 *
 *                 Check error code from ZLATRS3.
 *
                  IF( INFO.NE.0 )
     $               CALL ALAERH( PATH, 'ZLATRS3', INFO, 0,
     $                            UPLO // TRANS // DIAG // 'N', N, N,
     $                            -1, -1, -1, IMAT, NFAIL, NERRS, NOUT )
                  CALL ZTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
     $                         SCALE3( 1 ), RWORK, ONE, B( 1 ), LDA,
     $                         X, LDA, WORK, RESULT( 10 ) )
                  CALL ZDSCAL( N, BIGNUM, X, 1 )
                  CALL ZTRT03( UPLO, TRANS, DIAG, N, 1, A, LDA,
     $                         SCALE3( 2 ), RWORK, ONE, B( N+1 ), LDA,
     $                         X, LDA, WORK, RES )
                  RESULT( 10 ) = MAX( RESULT( 10 ), RES )
 *
 *                 Print information about the tests that did not pass
 *                 the threshold.
 *
@@ -552,7 +579,14 @@
     $                  DIAG, 'Y', N, IMAT, 9, RESULT( 9 )
                     NFAIL = NFAIL + 1
                  END IF
                  NRUN = NRUN + 2
                  IF( RESULT( 10 ).GE.THRESH ) THEN
                     IF( NFAIL.EQ.0 .AND. NERRS.EQ.0 )
     $                  CALL ALAHD( NOUT, PATH )
                     WRITE( NOUT, FMT = 9996 )'ZLATRS3', UPLO, TRANS,
     $                  DIAG, 'N', N, IMAT, 10, RESULT( 10 )
                     NFAIL = NFAIL + 1
                  END IF
                  NRUN = NRUN + 3
   90          CONTINUE
  100       CONTINUE
  110    CONTINUE
@@ -565,8 +599,8 @@
 9999 FORMAT( ' UPLO=''', A1, ''', DIAG=''', A1, ''', N=', I5, ', NB=',
     $      I4, ', type ', I2, ', test(', I2, ')= ', G12.5 )
 9998 FORMAT( ' UPLO=''', A1, ''', TRANS=''', A1, ''', DIAG=''', A1,
     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ',
     $      test(', I2, ')= ', G12.5 )
     $      ''', N=', I5, ', NB=', I4, ', type ', I2, ', test(',
     $      I2, ')= ', G12.5 )
 9997 FORMAT( ' NORM=''', A1, ''', UPLO =''', A1, ''', N=', I5, ',',
     $      11X, ' type ', I2, ', test(', I2, ')=', G12.5 )
 9996 FORMAT( 1X, A, '( ''', A1, ''', ''', A1, ''', ''', A1, ''', ''',
--- a/lapack-netlib/TESTING/LIN/zerrtr.f
+++ b/lapack-netlib/TESTING/LIN/zerrtr.f
@@ -82,9 +82,10 @@
      EXTERNAL           LSAMEN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ALAESM, CHKXER, ZLATBS, ZLATPS, ZLATRS, ZTBCON,
     $                   ZTBRFS, ZTBTRS, ZTPCON, ZTPRFS, ZTPTRI, ZTPTRS,
     $                   ZTRCON, ZTRRFS, ZTRTI2, ZTRTRI, ZTRTRS
      EXTERNAL           ALAESM, CHKXER, ZLATBS, ZLATPS, ZLATRS,
     $                   ZLATRS3, ZTBCON, ZTBRFS, ZTBTRS, ZTPCON,
     $                   ZTPRFS, ZTPTRI, ZTPTRS, ZTRCON, ZTRRFS, ZTRTI2,
     $                   ZTRTRI, ZTRTRS
 *     ..
 *     .. Scalars in Common ..
      LOGICAL            LERR, OK
@@ -240,6 +241,46 @@
         CALL ZLATRS( 'U', 'N', 'N', 'N', 2, A, 1, X, SCALE, RW, INFO )
         CALL CHKXER( 'ZLATRS', INFOT, NOUT, LERR, OK )
 *
 *        ZLATRS3
 *
         SRNAMT = 'ZLATRS3'
         INFOT = 1
         CALL ZLATRS3( '/', 'N', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 2
         CALL ZLATRS3( 'U', '/', 'N', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 3
         CALL ZLATRS3( 'U', 'N', '/', 'N', 0, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 4
         CALL ZLATRS3( 'U', 'N', 'N', '/', 0, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 5
         CALL ZLATRS3( 'U', 'N', 'N', 'N', -1, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 6
         CALL ZLATRS3( 'U', 'N', 'N', 'N', 0, -1, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 8
         CALL ZLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 10
         CALL ZLATRS3( 'U', 'N', 'N', 'N', 2, 0, A, 2, X, 1, SCALE, RW,
     $                 RW( 2 ), 1, INFO )
         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
         INFOT = 14
         CALL ZLATRS3( 'U', 'N', 'N', 'N', 1, 0, A, 1, X, 1, SCALE, RW,
     $                 RW( 2 ), 0, INFO )
         CALL CHKXER( 'ZLATRS3', INFOT, NOUT, LERR, OK )
 *
 *     Test error exits for the packed triangular routines.
 *
      ELSE IF( LSAMEN( 2, C2, 'TP' ) ) THEN