added benchmark program for lapack ?getri functionstags/v0.2.11^2
| @@ -41,6 +41,8 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ | |||
| cher2k.goto zher2k.goto \ | |||
| sgemv.goto dgemv.goto cgemv.goto zgemv.goto \ | |||
| sgeev.goto dgeev.goto cgeev.goto zgeev.goto \ | |||
| sgetri.goto dgetri.goto cgetri.goto zgetri.goto \ | |||
| spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \ | |||
| ssymm.goto dsymm.goto csymm.goto zsymm.goto | |||
| acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ | |||
| @@ -57,6 +59,8 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ | |||
| cher2k.acml zher2k.acml \ | |||
| sgemv.acml dgemv.acml cgemv.acml zgemv.acml \ | |||
| sgeev.acml dgeev.acml cgeev.acml zgeev.acml \ | |||
| sgetri.acml dgetri.acml cgetri.acml zgetri.acml \ | |||
| spotrf.acml dpotrf.acml cpotrf.acml zpotrf.acml \ | |||
| ssymm.acml dsymm.acml csymm.acml zsymm.acml | |||
| atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \ | |||
| @@ -74,6 +78,8 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \ | |||
| cher2k.atlas zher2k.atlas \ | |||
| sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \ | |||
| sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \ | |||
| sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \ | |||
| spotrf.atlas dpotrf.atlas cpotrf.atlas zpotrf.atlas \ | |||
| ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas | |||
| mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | |||
| @@ -90,6 +96,8 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | |||
| cher2k.mkl zher2k.mkl \ | |||
| sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \ | |||
| sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \ | |||
| sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \ | |||
| spotrf.mkl dpotrf.mkl cpotrf.mkl zpotrf.mkl \ | |||
| ssymm.mkl dsymm.mkl csymm.mkl zsymm.mkl | |||
| all :: goto atlas acml mkl | |||
| @@ -779,6 +787,115 @@ zgeev.mkl : zgeev.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Sgetri #################################################### | |||
| sgetri.goto : sgetri.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| sgetri.acml : sgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| sgetri.atlas : sgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| sgetri.mkl : sgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Dgetri #################################################### | |||
| dgetri.goto : dgetri.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dgetri.acml : dgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dgetri.atlas : dgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dgetri.mkl : dgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Cgetri #################################################### | |||
| cgetri.goto : cgetri.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| cgetri.acml : cgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cgetri.atlas : cgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cgetri.mkl : cgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zgetri #################################################### | |||
| zgetri.goto : zgetri.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| zgetri.acml : zgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zgetri.atlas : zgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zgetri.mkl : zgetri.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Spotrf #################################################### | |||
| spotrf.goto : spotrf.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| spotrf.acml : spotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| spotrf.atlas : spotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| spotrf.mkl : spotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Dpotrf #################################################### | |||
| dpotrf.goto : dpotrf.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dpotrf.acml : dpotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dpotrf.atlas : dpotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dpotrf.mkl : dpotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Cpotrf #################################################### | |||
| cpotrf.goto : cpotrf.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| cpotrf.acml : cpotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cpotrf.atlas : cpotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cpotrf.mkl : cpotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zpotrf #################################################### | |||
| zpotrf.goto : zpotrf.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| zpotrf.acml : zpotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zpotrf.atlas : zpotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zpotrf.mkl : zpotrf.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ################################################################################################### | |||
| @@ -932,6 +1049,32 @@ cgeev.$(SUFFIX) : geev.c | |||
| zgeev.$(SUFFIX) : geev.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| sgetri.$(SUFFIX) : getri.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| dgetri.$(SUFFIX) : getri.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| cgetri.$(SUFFIX) : getri.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zgetri.$(SUFFIX) : getri.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| spotrf.$(SUFFIX) : potrf.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| dpotrf.$(SUFFIX) : potrf.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| cpotrf.$(SUFFIX) : potrf.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zpotrf.$(SUFFIX) : potrf.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| clean :: | |||
| @@ -0,0 +1,234 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| #undef GETRF | |||
| #undef GETRI | |||
| #ifndef COMPLEX | |||
| #ifdef XDOUBLE | |||
| #define GETRF BLASFUNC(qgetrf) | |||
| #define GETRI BLASFUNC(qgetri) | |||
| #elif defined(DOUBLE) | |||
| #define GETRF BLASFUNC(dgetrf) | |||
| #define GETRI BLASFUNC(dgetri) | |||
| #else | |||
| #define GETRF BLASFUNC(sgetrf) | |||
| #define GETRI BLASFUNC(sgetri) | |||
| #endif | |||
| #else | |||
| #ifdef XDOUBLE | |||
| #define GETRF BLASFUNC(xgetrf) | |||
| #define GETRI BLASFUNC(xgetri) | |||
| #elif defined(DOUBLE) | |||
| #define GETRF BLASFUNC(zgetrf) | |||
| #define GETRI BLASFUNC(zgetri) | |||
| #else | |||
| #define GETRF BLASFUNC(cgetrf) | |||
| #define GETRI BLASFUNC(cgetri) | |||
| #endif | |||
| #endif | |||
| extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info); | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
| #endif | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
| static void *huge_malloc(BLASLONG size){ | |||
| int shmid; | |||
| void *address; | |||
| #ifndef SHM_HUGETLB | |||
| #define SHM_HUGETLB 04000 | |||
| #endif | |||
| if ((shmid =shmget(IPC_PRIVATE, | |||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
| printf( "Memory allocation failed(shmget).\n"); | |||
| exit(1); | |||
| } | |||
| address = shmat(shmid, NULL, SHM_RND); | |||
| if ((BLASLONG)address == -1){ | |||
| printf( "Memory allocation failed(shmat).\n"); | |||
| exit(1); | |||
| } | |||
| shmctl(shmid, IPC_RMID, 0); | |||
| return address; | |||
| } | |||
| #define malloc huge_malloc | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| FLOAT *a,*work; | |||
| FLOAT wkopt[4]; | |||
| blasint *ipiv; | |||
| blasint m, i, j, info,lwork; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| for(j = 0; j < to; j++){ | |||
| for(i = 0; i < to * COMPSIZE; i++){ | |||
| a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| lwork = -1; | |||
| m=to; | |||
| GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info); | |||
| lwork = (blasint)wkopt[0]; | |||
| if (( work = (FLOAT *)malloc(sizeof(FLOAT) * lwork * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| #ifdef linux | |||
| srandom(getpid()); | |||
| #endif | |||
| fprintf(stderr, " SIZE FLops Time Lwork\n"); | |||
| for(m = from; m <= to; m += step){ | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| GETRF (&m, &m, a, &m, ipiv, &info); | |||
| if (info) { | |||
| fprintf(stderr, "Matrix is not singular .. %d\n", info); | |||
| exit(1); | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| lwork = -1; | |||
| GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info); | |||
| lwork = (blasint)wkopt[0]; | |||
| GETRI(&m, a, &m, ipiv, work, &lwork, &info); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| if (info) { | |||
| fprintf(stderr, "failed compute inverse matrix .. %d\n", info); | |||
| exit(1); | |||
| } | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| fprintf(stderr, | |||
| " %10.2f MFlops : %10.2f Sec : %d\n", | |||
| COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -0,0 +1,282 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| double fabs(double); | |||
| #undef POTRF | |||
| #ifndef COMPLEX | |||
| #ifdef XDOUBLE | |||
| #define POTRF BLASFUNC(qpotrf) | |||
| #define POTRS BLASFUNC(qpotrs) | |||
| #define POTRI BLASFUNC(qpotri) | |||
| #define SYRK BLASFUNC(qsyrk) | |||
| #elif defined(DOUBLE) | |||
| #define POTRF BLASFUNC(dpotrf) | |||
| #define POTRS BLASFUNC(dpotrs) | |||
| #define POTRI BLASFUNC(dpotri) | |||
| #define SYRK BLASFUNC(dsyrk) | |||
| #else | |||
| #define POTRF BLASFUNC(spotrf) | |||
| #define POTRS BLASFUNC(spotrs) | |||
| #define POTRI BLASFUNC(spotri) | |||
| #define SYRK BLASFUNC(ssyrk) | |||
| #endif | |||
| #else | |||
| #ifdef XDOUBLE | |||
| #define POTRF BLASFUNC(xpotrf) | |||
| #define POTRS BLASFUNC(xpotrs) | |||
| #define POTRI BLASFUNC(xpotri) | |||
| #define SYRK BLASFUNC(xherk) | |||
| #elif defined(DOUBLE) | |||
| #define POTRF BLASFUNC(zpotrf) | |||
| #define POTRS BLASFUNC(zpotrs) | |||
| #define POTRI BLASFUNC(zpotri) | |||
| #define SYRK BLASFUNC(zherk) | |||
| #else | |||
| #define POTRF BLASFUNC(cpotrf) | |||
| #define POTRS BLASFUNC(cpotrs) | |||
| #define POTRI BLASFUNC(cpotri) | |||
| #define SYRK BLASFUNC(cherk) | |||
| #endif | |||
| #endif | |||
| // extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info); | |||
| // extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info); | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| #ifndef COMPLEX | |||
| char *trans[] = {"T", "N"}; | |||
| #else | |||
| char *trans[] = {"C", "N"}; | |||
| #endif | |||
| char *uplo[] = {"U", "L"}; | |||
| FLOAT alpha[] = {1.0, 0.0}; | |||
| FLOAT beta [] = {0.0, 0.0}; | |||
| FLOAT *a, *b; | |||
| char *p; | |||
| char btest = 'F'; | |||
| blasint m, i, j, info, uplos=0; | |||
| double flops; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| if ((p = getenv("OPENBLAS_UPLO"))) | |||
| if (*p == 'L') uplos=1; | |||
| if ((p = getenv("OPENBLAS_TEST"))) btest=*p; | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c\n", from, to, step,*uplo[uplos]); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| for(m = from; m <= to; m += step){ | |||
| #ifndef COMPLEX | |||
| if (uplos & 1) { | |||
| for (j = 0; j < m; j++) { | |||
| for(i = 0; i < j; i++) a[i + j * m] = 0.; | |||
| a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.; | |||
| for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
| } | |||
| } else { | |||
| for (j = 0; j < m; j++) { | |||
| for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
| a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.; | |||
| for(i = j + 1; i < m; i++) a[i + j * m] = 0.; | |||
| } | |||
| } | |||
| #else | |||
| if (uplos & 1) { | |||
| for (j = 0; j < m; j++) { | |||
| for(i = 0; i < j; i++) { | |||
| a[(i + j * m) * 2 + 0] = 0.; | |||
| a[(i + j * m) * 2 + 1] = 0.; | |||
| } | |||
| a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.; | |||
| a[(j + j * m) * 2 + 1] = 0.; | |||
| for(i = j + 1; i < m; i++) { | |||
| a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
| a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| } else { | |||
| for (j = 0; j < m; j++) { | |||
| for(i = 0; i < j; i++) { | |||
| a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
| a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
| } | |||
| a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.; | |||
| a[(j + j * m) * 2 + 1] = 0.; | |||
| for(i = j + 1; i < m; i++) { | |||
| a[(i + j * m) * 2 + 0] = 0.; | |||
| a[(i + j * m) * 2 + 1] = 0.; | |||
| } | |||
| } | |||
| } | |||
| #endif | |||
| SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| POTRF(uplo[uplos], &m, b, &m, &info); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| if (info != 0) { | |||
| fprintf(stderr, "Potrf info = %d\n", info); | |||
| exit(1); | |||
| } | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6; | |||
| if ( btest == 'S' ) | |||
| { | |||
| for(j = 0; j < to; j++){ | |||
| for(i = 0; i < to * COMPSIZE; i++){ | |||
| a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| if (info != 0) { | |||
| fprintf(stderr, "Potrs info = %d\n", info); | |||
| exit(1); | |||
| } | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6; | |||
| } | |||
| if ( btest == 'I' ) | |||
| { | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| POTRI(uplo[uplos], &m, b, &m, &info); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| if (info != 0) { | |||
| fprintf(stderr, "Potri info = %d\n", info); | |||
| exit(1); | |||
| } | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6; | |||
| } | |||
| fprintf(stderr, "%8d : %10.2f MFlops : %10.3f Sec : Test=%c\n",m,flops ,time1,btest); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -524,10 +524,21 @@ static __inline void blas_unlock(volatile BLASULONG *address){ | |||
| *address = 0; | |||
| } | |||
| #ifdef OS_WINDOWS | |||
| static __inline int readenv_atoi(char *env) { | |||
| env_var_t p; | |||
| return readenv(p,env) ? 0 : atoi(p); | |||
| } | |||
| #else | |||
| static __inline int readenv_atoi(char *env) { | |||
| char *p; | |||
| if (( p = getenv(env) )) | |||
| return (atoi(p)); | |||
| else | |||
| return(0); | |||
| } | |||
| #endif | |||
| #if !defined(XDOUBLE) || !defined(QUAD_PRECISION) | |||