Browse Source

Merge pull request #797 from wernsaar/develop

bugfixes for lapack and lapacke
tags/v0.2.16^2
wernsaar 10 years ago
parent
commit
711ecb8bd5
24 changed files with 120 additions and 62 deletions
  1. +3
    -3
      kernel/x86_64/KERNEL
  2. +0
    -3
      kernel/x86_64/KERNEL.BARCELONA
  3. +1
    -1
      kernel/x86_64/KERNEL.BULLDOZER
  4. +2
    -4
      lapack-netlib/LAPACKE/src/lapacke_clantr.c
  5. +2
    -4
      lapack-netlib/LAPACKE/src/lapacke_dlantr.c
  6. +2
    -2
      lapack-netlib/LAPACKE/src/lapacke_dlantr_work.c
  7. +3
    -8
      lapack-netlib/LAPACKE/src/lapacke_dormbr_work.c
  8. +1
    -6
      lapack-netlib/LAPACKE/src/lapacke_dormlq_work.c
  9. +2
    -4
      lapack-netlib/LAPACKE/src/lapacke_slantr.c
  10. +1
    -1
      lapack-netlib/LAPACKE/src/lapacke_slantr_work.c
  11. +5
    -2
      lapack-netlib/LAPACKE/src/lapacke_sormbr_work.c
  12. +5
    -1
      lapack-netlib/LAPACKE/src/lapacke_sormlq_work.c
  13. +2
    -4
      lapack-netlib/LAPACKE/src/lapacke_zlantr.c
  14. +1
    -1
      lapack-netlib/LAPACKE/src/lapacke_zlantr_work.c
  15. +2
    -2
      lapack-netlib/SRC/cgeev.f
  16. +19
    -1
      lapack-netlib/SRC/cgetc2.f
  17. +4
    -4
      lapack-netlib/SRC/cggev3.f
  18. +2
    -2
      lapack-netlib/SRC/dgeev.f
  19. +19
    -1
      lapack-netlib/SRC/dgetc2.f
  20. +2
    -2
      lapack-netlib/SRC/sgeev.f
  21. +19
    -1
      lapack-netlib/SRC/sgetc2.f
  22. +2
    -2
      lapack-netlib/SRC/zgeev.f
  23. +19
    -1
      lapack-netlib/SRC/zgetc2.f
  24. +2
    -2
      lapack-netlib/SRC/zggev3.f

+ 3
- 3
kernel/x86_64/KERNEL View File

@@ -389,7 +389,7 @@ DGEMVTKERNEL = dgemv_t.S
endif

ifndef CGEMVNKERNEL
CGEMVNKERNEL = cgemv_n.S
CGEMVNKERNEL = cgemv_n_4.c
endif

ifndef CGEMVTKERNEL
@@ -397,11 +397,11 @@ CGEMVTKERNEL = cgemv_t_4.c
endif

ifndef ZGEMVNKERNEL
ZGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n_4.c
endif

ifndef ZGEMVTKERNEL
ZGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t_4.c
endif

ifndef QGEMVNKERNEL


+ 0
- 3
kernel/x86_64/KERNEL.BARCELONA View File

@@ -1,6 +1,3 @@
ZGEMVNKERNEL = zgemv_n_dup.S
ZGEMVTKERNEL = zgemv_t.S

SGEMMKERNEL = gemm_kernel_8x4_barcelona.S
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
SGEMMITCOPY = ../generic/gemm_tcopy_8.c


+ 1
- 1
kernel/x86_64/KERNEL.BULLDOZER View File

@@ -18,7 +18,7 @@ SSYMV_L_KERNEL = ssymv_L.c
SGEMVNKERNEL = sgemv_n_4.c
SGEMVTKERNEL = sgemv_t_4.c

ZGEMVNKERNEL = zgemv_n_dup.S
ZGEMVNKERNEL = zgemv_n_4.c
ZGEMVTKERNEL = zgemv_t_4.c

DGEMVNKERNEL = dgemv_n_bulldozer.S


+ 2
- 4
lapack-netlib/LAPACKE/src/lapacke_clantr.c View File

@@ -51,8 +51,7 @@ float LAPACKE_clantr( int matrix_layout, char norm, char uplo, char diag,
}
#endif
/* Allocate memory for working array(s) */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,MAX(m,n)) );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
@@ -63,8 +62,7 @@ float LAPACKE_clantr( int matrix_layout, char norm, char uplo, char diag,
res = LAPACKE_clantr_work( matrix_layout, norm, uplo, diag, m, n, a, lda,
work );
/* Release memory and exit */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
LAPACKE_free( work );
}
exit_level_0:


+ 2
- 4
lapack-netlib/LAPACKE/src/lapacke_dlantr.c View File

@@ -51,8 +51,7 @@ double LAPACKE_dlantr( int matrix_layout, char norm, char uplo, char diag,
}
#endif
/* Allocate memory for working array(s) */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,MAX(m,n)) );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
@@ -63,8 +62,7 @@ double LAPACKE_dlantr( int matrix_layout, char norm, char uplo, char diag,
res = LAPACKE_dlantr_work( matrix_layout, norm, uplo, diag, m, n, a, lda,
work );
/* Release memory and exit */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
LAPACKE_free( work );
}
exit_level_0:


+ 2
- 2
lapack-netlib/LAPACKE/src/lapacke_dlantr_work.c View File

@@ -38,10 +38,10 @@ double LAPACKE_dlantr_work( int matrix_layout, char norm, char uplo,
const double* a, lapack_int lda, double* work )
{
lapack_int info = 0;
double res = 0.;
double res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
res = LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
if( info < 0 ) {
info = info - 1;
}


+ 3
- 8
lapack-netlib/LAPACKE/src/lapacke_dormbr_work.c View File

@@ -74,11 +74,10 @@ lapack_int LAPACKE_dormbr_work( int matrix_layout, char vect, char side,
}
/* Allocate memory for temporary array(s) */
if( LAPACKE_lsame( vect, 'q' ) ) {
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * k );
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,k) );
} else {
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * nq );
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,nq) );
}
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
@@ -89,11 +88,7 @@ lapack_int LAPACKE_dormbr_work( int matrix_layout, char vect, char side,
goto exit_level_1;
}
/* Transpose input matrices */
if( LAPACKE_lsame( vect, 'q' ) ) {
LAPACKE_dge_trans( matrix_layout, nq, k, a, lda, a_t, lda_t );
} else {
LAPACKE_dge_trans( matrix_layout, k, nq, a, lda, a_t, lda_t );
}
LAPACKE_dge_trans( matrix_layout, r, MIN(nq,k), a, lda, a_t, lda_t );
LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */
LAPACK_dormbr( &vect, &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t,


+ 1
- 6
lapack-netlib/LAPACKE/src/lapacke_dormlq_work.c View File

@@ -87,12 +87,7 @@ lapack_int LAPACKE_dormlq_work( int matrix_layout, char side, char trans,
goto exit_level_1;
}
/* Transpose input matrices */
if( LAPACKE_lsame( side, 'l' ) ){
LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t );
} else {
LAPACKE_dge_trans( matrix_layout, k, n, a, lda, a_t, lda_t );
}

LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t );
LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
/* Call LAPACK function and adjust info */
LAPACK_dormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t,


+ 2
- 4
lapack-netlib/LAPACKE/src/lapacke_slantr.c View File

@@ -51,8 +51,7 @@ float LAPACKE_slantr( int matrix_layout, char norm, char uplo, char diag,
}
#endif
/* Allocate memory for working array(s) */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,MAX(m,n)) );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
@@ -63,8 +62,7 @@ float LAPACKE_slantr( int matrix_layout, char norm, char uplo, char diag,
res = LAPACKE_slantr_work( matrix_layout, norm, uplo, diag, m, n, a, lda,
work );
/* Release memory and exit */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
LAPACKE_free( work );
}
exit_level_0:


+ 1
- 1
lapack-netlib/LAPACKE/src/lapacke_slantr_work.c View File

@@ -41,7 +41,7 @@ float LAPACKE_slantr_work( int matrix_layout, char norm, char uplo,
float res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_slantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
res = LAPACK_slantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
if( info < 0 ) {
info = info - 1;
}


+ 5
- 2
lapack-netlib/LAPACKE/src/lapacke_sormbr_work.c View File

@@ -73,8 +73,11 @@ lapack_int LAPACKE_sormbr_work( int matrix_layout, char vect, char side,
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (float*)
LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,MIN(nq,k)) );
if( LAPACKE_lsame( vect, 'q' ) ) {
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,k) );
} else {
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,nq) );
}
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;


+ 5
- 1
lapack-netlib/LAPACKE/src/lapacke_sormlq_work.c View File

@@ -72,7 +72,11 @@ lapack_int LAPACKE_sormlq_work( int matrix_layout, char side, char trans,
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,m) );
if( LAPACKE_lsame( side, 'l' ) ) {
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,m) );
} else {
a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) );
}
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;


+ 2
- 4
lapack-netlib/LAPACKE/src/lapacke_zlantr.c View File

@@ -51,8 +51,7 @@ double LAPACKE_zlantr( int matrix_layout, char norm, char uplo, char diag,
}
#endif
/* Allocate memory for working array(s) */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,MAX(m,n)) );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
@@ -63,8 +62,7 @@ double LAPACKE_zlantr( int matrix_layout, char norm, char uplo, char diag,
res = LAPACKE_zlantr_work( matrix_layout, norm, uplo, diag, m, n, a, lda,
work );
/* Release memory and exit */
if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) ||
LAPACKE_lsame( norm, 'O' ) ) {
if( LAPACKE_lsame( norm, 'i' ) ) {
LAPACKE_free( work );
}
exit_level_0:


+ 1
- 1
lapack-netlib/LAPACKE/src/lapacke_zlantr_work.c View File

@@ -39,7 +39,7 @@ double LAPACKE_zlantr_work( int matrix_layout, char norm, char uplo,
double* work )
{
lapack_int info = 0;
double res = 0.;
double res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
res = LAPACK_zlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );


+ 2
- 2
lapack-netlib/SRC/cgeev.f View File

@@ -405,9 +405,9 @@
$ WORK( IWRK ), LWORK-IWRK+1, INFO )
END IF
*
* If INFO > 0 from CHSEQR, then quit
* If INFO .NE. 0 from CHSEQR, then quit
*
IF( INFO.GT.0 )
IF( INFO.NE.0 )
$ GO TO 50
*
IF( WANTVL .OR. WANTVR ) THEN


+ 19
- 1
lapack-netlib/SRC/cgetc2.f View File

@@ -145,15 +145,33 @@
INTRINSIC ABS, CMPLX, MAX
* ..
* .. Executable Statements ..
*
INFO = 0
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Set constants to control overflow
*
INFO = 0
EPS = SLAMCH( 'P' )
SMLNUM = SLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
CALL SLABAD( SMLNUM, BIGNUM )
*
* Handle the case N=1 by itself
*
IF( N.EQ.1 ) THEN
IPIV( 1 ) = 1
JPIV( 1 ) = 1
IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN
INFO = 1
A( 1, 1 ) = CMPLX( SMLNUM, ZERO )
END IF
RETURN
END IF
*
* Factorize A using complete pivoting.
* Set pivots less than SMIN to SMIN
*


+ 4
- 4
lapack-netlib/SRC/cggev3.f View File

@@ -339,16 +339,16 @@
$ LDVL, VR, LDVR, WORK, -1, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
CALL CHGEQZ( 'S', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB,
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK,
$ -1, WORK, IERR )
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1,
$ RWORK, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
ELSE
CALL CGGHD3( 'N', 'N', N, 1, N, A, LDA, B, LDB, VL, LDVL,
$ VR, LDVR, WORK, -1, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
CALL CHGEQZ( 'E', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB,
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK,
$ -1, WORK, IERR )
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1,
$ RWORK, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
END IF
WORK( 1 ) = CMPLX( LWKOPT )


+ 2
- 2
lapack-netlib/SRC/dgeev.f View File

@@ -418,9 +418,9 @@
$ WORK( IWRK ), LWORK-IWRK+1, INFO )
END IF
*
* If INFO > 0 from DHSEQR, then quit
* If INFO .NE. 0 from DHSEQR, then quit
*
IF( INFO.GT.0 )
IF( INFO.NE.0 )
$ GO TO 50
*
IF( WANTVL .OR. WANTVR ) THEN


+ 19
- 1
lapack-netlib/SRC/dgetc2.f View File

@@ -145,15 +145,33 @@
INTRINSIC ABS, MAX
* ..
* .. Executable Statements ..
*
INFO = 0
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Set constants to control overflow
*
INFO = 0
EPS = DLAMCH( 'P' )
SMLNUM = DLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
CALL DLABAD( SMLNUM, BIGNUM )
*
* Handle the case N=1 by itself
*
IF( N.EQ.1 ) THEN
IPIV( 1 ) = 1
JPIV( 1 ) = 1
IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN
INFO = 1
A( 1, 1 ) = SMLNUM
END IF
RETURN
END IF
*
* Factorize A using complete pivoting.
* Set pivots less than SMIN to SMIN.
*


+ 2
- 2
lapack-netlib/SRC/sgeev.f View File

@@ -418,9 +418,9 @@
$ WORK( IWRK ), LWORK-IWRK+1, INFO )
END IF
*
* If INFO > 0 from SHSEQR, then quit
* If INFO .NE. 0 from SHSEQR, then quit
*
IF( INFO.GT.0 )
IF( INFO.NE.0 )
$ GO TO 50
*
IF( WANTVL .OR. WANTVR ) THEN


+ 19
- 1
lapack-netlib/SRC/sgetc2.f View File

@@ -145,15 +145,33 @@
INTRINSIC ABS, MAX
* ..
* .. Executable Statements ..
*
INFO = 0
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Set constants to control overflow
*
INFO = 0
EPS = SLAMCH( 'P' )
SMLNUM = SLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
CALL SLABAD( SMLNUM, BIGNUM )
*
* Handle the case N=1 by itself
*
IF( N.EQ.1 ) THEN
IPIV( 1 ) = 1
JPIV( 1 ) = 1
IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN
INFO = 1
A( 1, 1 ) = SMLNUM
END IF
RETURN
END IF
*
* Factorize A using complete pivoting.
* Set pivots less than SMIN to SMIN.
*


+ 2
- 2
lapack-netlib/SRC/zgeev.f View File

@@ -404,9 +404,9 @@
$ WORK( IWRK ), LWORK-IWRK+1, INFO )
END IF
*
* If INFO > 0 from ZHSEQR, then quit
* If INFO .NE. 0 from ZHSEQR, then quit
*
IF( INFO.GT.0 )
IF( INFO.NE.0 )
$ GO TO 50
*
IF( WANTVL .OR. WANTVR ) THEN


+ 19
- 1
lapack-netlib/SRC/zgetc2.f View File

@@ -145,15 +145,33 @@
INTRINSIC ABS, DCMPLX, MAX
* ..
* .. Executable Statements ..
*
INFO = 0
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
* Set constants to control overflow
*
INFO = 0
EPS = DLAMCH( 'P' )
SMLNUM = DLAMCH( 'S' ) / EPS
BIGNUM = ONE / SMLNUM
CALL DLABAD( SMLNUM, BIGNUM )
*
* Handle the case N=1 by itself
*
IF( N.EQ.1 ) THEN
IPIV( 1 ) = 1
JPIV( 1 ) = 1
IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN
INFO = 1
A( 1, 1 ) = DCMPLX( SMLNUM, ZERO )
END IF
RETURN
END IF
*
* Factorize A using complete pivoting.
* Set pivots less than SMIN to SMIN
*


+ 2
- 2
lapack-netlib/SRC/zggev3.f View File

@@ -340,7 +340,7 @@
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
CALL ZHGEQZ( 'S', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB,
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1,
$ WORK, IERR )
$ RWORK, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
ELSE
CALL ZGGHD3( JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, VL,
@@ -348,7 +348,7 @@
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
CALL ZHGEQZ( 'E', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB,
$ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1,
$ WORK, IERR )
$ RWORK, IERR )
LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) )
END IF
WORK( 1 ) = DCMPLX( LWKOPT )


Loading…
Cancel
Save