| @@ -1,4 +1,24 @@ | |||
| OpenBLAS ChangeLog | |||
| ==================================================================== | |||
| Version 0.2.10 | |||
| 16-Jul-2014 | |||
| common: | |||
| * Added BLAS extensions as following. | |||
| s/d/c/zaxpby, s/d/c/zimatcopy, s/d/c/zomatcopy. | |||
| * Added OPENBLAS_CORETYPE environment for dynamic_arch. (a86d34) | |||
| * Added NO_AVX2 flag for old binutils. (#401) | |||
| * Support outputing the CPU corename on runtime.(#407) | |||
| * Patched LAPACK to fix bug 114, 117, 118. | |||
| (http://www.netlib.org/lapack/bug_list.html) | |||
| * Disabled ?gemm3m for a work-around fix. (#400) | |||
| x86/x86-64: | |||
| * Fixed lots of bugs for optimized kernels on sandybridge,Haswell, | |||
| bulldozer, and piledriver. | |||
| https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List | |||
| ARM: | |||
| * Improved LAPACK testing. | |||
| ==================================================================== | |||
| Version 0.2.9 | |||
| 10-Jun-2014 | |||
| @@ -3,7 +3,7 @@ | |||
| # | |||
| # This library's version | |||
| VERSION = 0.2.10.rc2 | |||
| VERSION = 0.2.10 | |||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | |||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | |||
| @@ -25,9 +25,20 @@ VERSION = 0.2.10.rc2 | |||
| # FC = gfortran | |||
| # Even you can specify cross compiler. Meanwhile, please set HOSTCC. | |||
| # cross compiler for Windows | |||
| # CC = x86_64-w64-mingw32-gcc | |||
| # FC = x86_64-w64-mingw32-gfortran | |||
| # cross compiler for 32bit ARM | |||
| # CC = arm-linux-gnueabihf-gcc | |||
| # FC = arm-linux-gnueabihf-gfortran | |||
| # cross compiler for 64bit ARM | |||
| # CC = aarch64-linux-gnu-gcc | |||
| # FC = aarch64-linux-gnu-gfortran | |||
| # If you use the cross compiler, please set this host compiler. | |||
| # HOSTCC = gcc | |||
| @@ -88,6 +99,9 @@ NO_AFFINITY = 1 | |||
| # and OS. However, the performance is low. | |||
| # NO_AVX = 1 | |||
| # Don't use Haswell optimizations if binutils is too old (e.g. RHEL6) | |||
| # NO_AVX2 = 1 | |||
| # Don't use parallel make. | |||
| # NO_PARALLEL_MAKE = 1 | |||
| @@ -109,6 +109,10 @@ ifeq ($(BINARY), 32) | |||
| GETARCH_FLAGS += -DNO_AVX | |||
| endif | |||
| ifeq ($(NO_AVX2), 1) | |||
| GETARCH_FLAGS += -DNO_AVX2 | |||
| endif | |||
| ifeq ($(DEBUG), 1) | |||
| GETARCH_FLAGS += -g | |||
| endif | |||
| @@ -385,7 +389,10 @@ endif | |||
| ifeq ($(ARCH), x86_64) | |||
| DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
| ifneq ($(NO_AVX), 1) | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||
| endif | |||
| ifneq ($(NO_AVX2), 1) | |||
| DYNAMIC_CORE += HASWELL | |||
| endif | |||
| endif | |||
| @@ -777,6 +784,10 @@ ifeq ($(BINARY), 32) | |||
| CCOMMON_OPT += -DNO_AVX | |||
| endif | |||
| ifeq ($(NO_AVX2), 1) | |||
| CCOMMON_OPT += -DNO_AVX2 | |||
| endif | |||
| ifdef SMP | |||
| CCOMMON_OPT += -DSMP_SERVER | |||
| @@ -1,157 +1,607 @@ | |||
| TOPDIR = .. | |||
| include $(TOPDIR)/Makefile.system | |||
| CULA_INC = -I/usr/local/cula/include | |||
| CULA_LIB = -L/usr/local/cula/lib64 -Wl,-rpath,/usr/local/cula/lib64 -lcula_fortran -lcula -lcublas | |||
| all :: dlinpack.goto dlinpack.mkl dlinpack.acml dcholesky.goto dcholesky.mkl dcholesky.acml | |||
| ./dlinpack.goto 4000 4000 1 | |||
| -./dlinpack.mkl 4000 4000 1 | |||
| -./dlinpack.acml 4000 4000 1 | |||
| ./dcholesky.goto 4000 4000 1 | |||
| -./dcholesky.mkl 4000 4000 1 | |||
| -./dcholesky.acml 4000 4000 1 | |||
| # ACML standard | |||
| ACML=/opt/acml5.3.1/gfortran64_mp/lib | |||
| LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm | |||
| # ACML custom | |||
| #ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib | |||
| #LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm | |||
| # Atlas Ubuntu | |||
| #ATLAS=/usr/lib/atlas-base | |||
| #LIBATLAS = -fopenmp $(ATLAS)/liblapack_atlas.a $(ATLAS)/libptcblas.a $(ATLAS)/libptf77blas.a $(ATLAS)/libatlas.a -lgfortran -lm | |||
| # Atlas RHEL and Fedora | |||
| ATLAS=/usr/lib64/atlas | |||
| LIBATLAS = -fopenmp $(ATLAS)/liblapack.a $(ATLAS)/libptcblas.a $(ATLAS)/libptf77blas.a $(ATLAS)/libatlas.a -lgfortran -lm | |||
| # Intel standard | |||
| MKL=/opt/intel/mkl/lib/intel64 | |||
| LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm | |||
| # Intel custom | |||
| #MKL=/home/saar/intel_mkl | |||
| #LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm | |||
| goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ | |||
| scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \ | |||
| sgemm.goto dgemm.goto cgemm.goto zgemm.goto \ | |||
| strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \ | |||
| strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \ | |||
| ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \ | |||
| ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \ | |||
| chemm.goto zhemm.goto \ | |||
| cherk.goto zherk.goto \ | |||
| cher2k.goto zher2k.goto \ | |||
| ssymm.goto dsymm.goto csymm.goto zsymm.goto | |||
| acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ | |||
| scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \ | |||
| sgemm.acml dgemm.acml cgemm.acml zgemm.acml \ | |||
| strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \ | |||
| strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \ | |||
| ssyrk.acml dsyrk.acml csyrk.acml zsyrk.acml \ | |||
| ssyr2k.acml dsyr2k.acml csyr2k.acml zsyr2k.acml \ | |||
| chemm.acml zhemm.acml \ | |||
| cherk.acml zherk.acml \ | |||
| cher2k.acml zher2k.acml \ | |||
| ssymm.acml dsymm.acml csymm.acml zsymm.acml | |||
| atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \ | |||
| scholesky.atlas dcholesky.atlas ccholesky.atlas zcholesky.atlas \ | |||
| sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \ | |||
| strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \ | |||
| strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \ | |||
| ssyrk.atlas dsyrk.atlas csyrk.atlas zsyrk.atlas \ | |||
| ssyr2k.atlas dsyr2k.atlas csyr2k.atlas zsyr2k.atlas \ | |||
| chemm.atlas zhemm.atlas \ | |||
| cherk.atlas zherk.atlas \ | |||
| cher2k.atlas zher2k.atlas \ | |||
| ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas | |||
| mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | |||
| scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \ | |||
| sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \ | |||
| strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \ | |||
| strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \ | |||
| ssyrk.mkl dsyrk.mkl csyrk.mkl zsyrk.mkl \ | |||
| ssyr2k.mkl dsyr2k.mkl csyr2k.mkl zsyr2k.mkl \ | |||
| chemm.mkl zhemm.mkl \ | |||
| cherk.mkl zherk.mkl \ | |||
| cher2k.mkl zher2k.mkl \ | |||
| ssymm.mkl dsymm.mkl csymm.mkl zsymm.mkl | |||
| all :: goto atlas acml mkl | |||
| ##################################### Slinpack #################################################### | |||
| slinpack.goto : slinpack.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| slinpack.acml : slinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| slinpack.atlas : slinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| slinpack.mkl : slinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Dlinpack #################################################### | |||
| dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| qlinpack.goto : qlinpack.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dlinpack.acml : dlinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dlinpack.atlas : dlinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dlinpack.mkl : dlinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Clinpack #################################################### | |||
| clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| clinpack.acml : clinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| clinpack.atlas : clinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| clinpack.mkl : clinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zlinpack #################################################### | |||
| zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| xlinpack.goto : xlinpack.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| zlinpack.acml : zlinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zlinpack.atlas : zlinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zlinpack.mkl : zlinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Scholesky ################################################### | |||
| scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| scholesky.acml : scholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| scholesky.atlas : scholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| scholesky.mkl : scholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Dcholesky ################################################### | |||
| dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| qcholesky.goto : qcholesky.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dcholesky.acml : dcholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dcholesky.atlas : dcholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dcholesky.mkl : dcholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Ccholesky ################################################### | |||
| ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| ccholesky.acml : ccholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ccholesky.atlas : ccholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ccholesky.mkl : ccholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zcholesky.goto : zcholesky.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| ##################################### Zcholesky ################################################### | |||
| xcholesky.goto : xcholesky.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| slinpack.mkl : slinpack.$(SUFFIX) | |||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zcholesky.acml : zcholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dlinpack.mkl : dlinpack.$(SUFFIX) | |||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zcholesky.atlas : zcholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| clinpack.mkl : clinpack.$(SUFFIX) | |||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zcholesky.mkl : zcholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zlinpack.mkl : zlinpack.$(SUFFIX) | |||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| scholesky.mkl : scholesky.$(SUFFIX) | |||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Sgemm #################################################### | |||
| sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dcholesky.mkl : dcholesky.$(SUFFIX) | |||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| sgemm.acml : sgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ccholesky.mkl : ccholesky.$(SUFFIX) | |||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| sgemm.atlas : sgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zcholesky.mkl : zcholesky.$(SUFFIX) | |||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| sgemm.mkl : sgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| slinpack.acml : slinpack.$(SUFFIX) | |||
| ##################################### Dgemm #################################################### | |||
| dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dgemm.acml : dgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dlinpack.acml : dlinpack.$(SUFFIX) | |||
| dgemm.atlas : dgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dgemm.mkl : dgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Cgemm #################################################### | |||
| cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| cgemm.acml : cgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| clinpack.acml : clinpack.$(SUFFIX) | |||
| cgemm.atlas : cgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cgemm.mkl : cgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zgemm #################################################### | |||
| zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| zgemm.acml : zgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zlinpack.acml : zlinpack.$(SUFFIX) | |||
| zgemm.atlas : zgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zgemm.mkl : zgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Ssymm #################################################### | |||
| ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| ssymm.acml : ssymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| scholesky.acml : scholesky.$(SUFFIX) | |||
| ssymm.atlas : ssymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ssymm.mkl : ssymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Dsymm #################################################### | |||
| dsymm.goto : dsymm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dsymm.acml : dsymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dcholesky.acml : dcholesky.$(SUFFIX) | |||
| dsymm.atlas : dsymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dsymm.mkl : dsymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Csymm #################################################### | |||
| csymm.goto : csymm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| csymm.acml : csymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ccholesky.acml : ccholesky.$(SUFFIX) | |||
| csymm.atlas : csymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| csymm.mkl : csymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zsymm #################################################### | |||
| zsymm.goto : zsymm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| zsymm.acml : zsymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zcholesky.acml : zcholesky.$(SUFFIX) | |||
| zsymm.atlas : zsymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zsymm.mkl : zsymm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Strmm #################################################### | |||
| strmm.goto : strmm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| strmm.acml : strmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| strmm.atlas : strmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| strmm.mkl : strmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Dtrmm #################################################### | |||
| dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dtrmm.acml : dtrmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dtrmm.atlas : dtrmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dtrmm.mkl : dtrmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Ctrmm #################################################### | |||
| ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| ctrmm.acml : ctrmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| slinpack.flame : slinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ctrmm.atlas : ctrmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dlinpack.flame : dlinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ctrmm.mkl : ctrmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| clinpack.flame : clinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Ztrmm #################################################### | |||
| zlinpack.flame : zlinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| scholesky.flame : scholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ztrmm.acml : ztrmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dcholesky.flame : dcholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ztrmm.atlas : ztrmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ccholesky.flame : ccholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ztrmm.mkl : ztrmm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zcholesky.flame : zcholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| slinpack.sun : slinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Strsm #################################################### | |||
| strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dlinpack.sun : dlinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| strsm.acml : strsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| clinpack.sun : clinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| strsm.atlas : strsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zlinpack.sun : zlinpack.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| strsm.mkl : strsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| scholesky.sun : scholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Dtrsm #################################################### | |||
| dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dcholesky.sun : dcholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dtrsm.acml : dtrsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ccholesky.sun : ccholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dtrsm.atlas : dtrsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zcholesky.sun : zcholesky.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dtrsm.mkl : dtrsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| slinpack.cula : slinpack.$(SUFFIX) cula_wrapper.$(SUFFIX) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CULA_LIB) ../$(LIBNAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Ctrsm #################################################### | |||
| clinpack.cula : clinpack.$(SUFFIX) cula_wrapper.$(SUFFIX) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CULA_LIB) ../$(LIBNAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| ctrsm.acml : ctrsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cula_wrapper.$(SUFFIX) : cula_wrapper.c | |||
| $(CC) $(CFLAGS) -c $(CULA_INC) -o $(@F) $^ | |||
| ctrsm.atlas : ctrsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ctrsm.mkl : ctrsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Ztrsm #################################################### | |||
| ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| ztrsm.acml : ztrsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ztrsm.atlas : ztrsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ztrsm.mkl : ztrsm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Ssyrk #################################################### | |||
| ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| ssyrk.acml : ssyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ssyrk.atlas : ssyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ssyrk.mkl : ssyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Dsyrk #################################################### | |||
| dsyrk.goto : dsyrk.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dsyrk.acml : dsyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dsyrk.atlas : dsyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dsyrk.mkl : dsyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Csyrk #################################################### | |||
| csyrk.goto : csyrk.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| csyrk.acml : csyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| csyrk.atlas : csyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| csyrk.mkl : csyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zsyrk #################################################### | |||
| zsyrk.goto : zsyrk.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| zsyrk.acml : zsyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zsyrk.atlas : zsyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zsyrk.mkl : zsyrk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Ssyr2k #################################################### | |||
| ssyr2k.goto : ssyr2k.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| ssyr2k.acml : ssyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ssyr2k.atlas : ssyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ssyr2k.mkl : ssyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Dsyr2k #################################################### | |||
| dsyr2k.goto : dsyr2k.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| dsyr2k.acml : dsyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dsyr2k.atlas : dsyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dsyr2k.mkl : dsyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Csyr2k #################################################### | |||
| csyr2k.goto : csyr2k.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| csyr2k.acml : csyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| csyr2k.atlas : csyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| csyr2k.mkl : csyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zsyr2k #################################################### | |||
| zsyr2k.goto : zsyr2k.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| zsyr2k.acml : zsyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zsyr2k.atlas : zsyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zsyr2k.mkl : zsyr2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Chemm #################################################### | |||
| chemm.goto : chemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| chemm.acml : chemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| chemm.atlas : chemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| chemm.mkl : chemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zhemm #################################################### | |||
| zhemm.goto : zhemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| zhemm.acml : zhemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zhemm.atlas : zhemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zhemm.mkl : zhemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Cherk #################################################### | |||
| cherk.goto : cherk.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| cherk.acml : cherk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cherk.atlas : cherk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cherk.mkl : cherk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zherk #################################################### | |||
| zherk.goto : zherk.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| zherk.acml : zherk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zherk.atlas : zherk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zherk.mkl : zherk.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Cher2k #################################################### | |||
| cher2k.goto : cher2k.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| cher2k.acml : cher2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cher2k.atlas : cher2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cher2k.mkl : cher2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zher2k #################################################### | |||
| zher2k.goto : zher2k.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||
| zher2k.acml : zher2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zher2k.atlas : zher2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zher2k.mkl : zher2k.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ################################################################################################### | |||
| slinpack.$(SUFFIX) : linpack.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| @@ -159,37 +609,119 @@ slinpack.$(SUFFIX) : linpack.c | |||
| dlinpack.$(SUFFIX) : linpack.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| qlinpack.$(SUFFIX) : linpack.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DXDOUBLE -o $(@F) $^ | |||
| clinpack.$(SUFFIX) : linpack.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zlinpack.$(SUFFIX) : linpack.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| xlinpack.$(SUFFIX) : linpack.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DXDOUBLE -o $(@F) $^ | |||
| scholesky.$(SUFFIX) : cholesky.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| dcholesky.$(SUFFIX) : cholesky.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| qcholesky.$(SUFFIX) : cholesky.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DXDOUBLE -o $(@F) $^ | |||
| ccholesky.$(SUFFIX) : cholesky.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zcholesky.$(SUFFIX) : cholesky.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| xcholesky.$(SUFFIX) : cholesky.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DXDOUBLE -o $(@F) $^ | |||
| sgemm.$(SUFFIX) : gemm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| dgemm.$(SUFFIX) : gemm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| cgemm.$(SUFFIX) : gemm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zgemm.$(SUFFIX) : gemm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| ssymm.$(SUFFIX) : symm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| dsymm.$(SUFFIX) : symm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| csymm.$(SUFFIX) : symm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zsymm.$(SUFFIX) : symm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| strmm.$(SUFFIX) : trmm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| dtrmm.$(SUFFIX) : trmm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| ctrmm.$(SUFFIX) : trmm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| ztrmm.$(SUFFIX) : trmm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| strsm.$(SUFFIX) : trsm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| dtrsm.$(SUFFIX) : trsm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| ctrsm.$(SUFFIX) : trsm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| ztrsm.$(SUFFIX) : trsm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| ssyrk.$(SUFFIX) : syrk.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| dsyrk.$(SUFFIX) : syrk.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| csyrk.$(SUFFIX) : syrk.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zsyrk.$(SUFFIX) : syrk.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| ssyr2k.$(SUFFIX) : syr2k.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| dsyr2k.$(SUFFIX) : syr2k.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| csyr2k.$(SUFFIX) : syr2k.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zsyr2k.$(SUFFIX) : syr2k.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| chemm.$(SUFFIX) : hemm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zhemm.$(SUFFIX) : hemm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| cherk.$(SUFFIX) : herk.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zherk.$(SUFFIX) : herk.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| cher2k.$(SUFFIX) : her2k.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| zher2k.$(SUFFIX) : her2k.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| clean :: | |||
| @rm -f *.goto *.mkl *.acml *.sun *.cula | |||
| @rm -f *.goto *.mkl *.acml *.atlas | |||
| include $(TOPDIR)/Makefile.tail | |||
| @@ -0,0 +1,210 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| #undef GEMM | |||
| #ifndef COMPLEX | |||
| #ifdef DOUBLE | |||
| #define GEMM BLASFUNC(dgemm) | |||
| #else | |||
| #define GEMM BLASFUNC(sgemm) | |||
| #endif | |||
| #else | |||
| #ifdef DOUBLE | |||
| #define GEMM BLASFUNC(zgemm) | |||
| #else | |||
| #define GEMM BLASFUNC(cgemm) | |||
| #endif | |||
| #endif | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
| #endif | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
| static void *huge_malloc(BLASLONG size){ | |||
| int shmid; | |||
| void *address; | |||
| #ifndef SHM_HUGETLB | |||
| #define SHM_HUGETLB 04000 | |||
| #endif | |||
| if ((shmid =shmget(IPC_PRIVATE, | |||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
| printf( "Memory allocation failed(shmget).\n"); | |||
| exit(1); | |||
| } | |||
| address = shmat(shmid, NULL, SHM_RND); | |||
| if ((BLASLONG)address == -1){ | |||
| printf( "Memory allocation failed(shmat).\n"); | |||
| exit(1); | |||
| } | |||
| shmctl(shmid, IPC_RMID, 0); | |||
| return address; | |||
| } | |||
| #define malloc huge_malloc | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| FLOAT *a, *b, *c; | |||
| FLOAT alpha[] = {1.0, 1.0}; | |||
| FLOAT beta [] = {1.0, 1.0}; | |||
| char trans='N'; | |||
| blasint m, i, j; | |||
| int loops = 1; | |||
| int l; | |||
| char *p; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1,timeg; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| p = getenv("OPENBLAS_LOOPS"); | |||
| if ( p != NULL ) | |||
| loops = atoi(p); | |||
| #ifdef linux | |||
| srandom(getpid()); | |||
| #endif | |||
| fprintf(stderr, " SIZE Flops\n"); | |||
| for(m = from; m <= to; m += step) | |||
| { | |||
| timeg=0; | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| for (l=0; l<loops; l++) | |||
| { | |||
| for(j = 0; j < m; j++){ | |||
| for(i = 0; i < m * COMPSIZE; i++){ | |||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| timeg += time1; | |||
| } | |||
| timeg /= loops; | |||
| fprintf(stderr, | |||
| " %10.2f MFlops\n", | |||
| COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -0,0 +1,192 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| #undef HEMM | |||
| #ifdef DOUBLE | |||
| #define HEMM BLASFUNC(zhemm) | |||
| #else | |||
| #define HEMM BLASFUNC(chemm) | |||
| #endif | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
| #endif | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
| static void *huge_malloc(BLASLONG size){ | |||
| int shmid; | |||
| void *address; | |||
| #ifndef SHM_HUGETLB | |||
| #define SHM_HUGETLB 04000 | |||
| #endif | |||
| if ((shmid =shmget(IPC_PRIVATE, | |||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
| printf( "Memory allocation failed(shmget).\n"); | |||
| exit(1); | |||
| } | |||
| address = shmat(shmid, NULL, SHM_RND); | |||
| if ((BLASLONG)address == -1){ | |||
| printf( "Memory allocation failed(shmat).\n"); | |||
| exit(1); | |||
| } | |||
| shmctl(shmid, IPC_RMID, 0); | |||
| return address; | |||
| } | |||
| #define malloc huge_malloc | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| FLOAT *a, *b, *c; | |||
| FLOAT alpha[] = {1.0, 1.0}; | |||
| FLOAT beta [] = {1.0, 1.0}; | |||
| char *p; | |||
| char side='L'; | |||
| char uplo='U'; | |||
| if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | |||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
| blasint m, i, j; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| #ifdef linux | |||
| srandom(getpid()); | |||
| #endif | |||
| fprintf(stderr, " SIZE Flops\n"); | |||
| for(m = from; m <= to; m += step) | |||
| { | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| for(j = 0; j < m; j++){ | |||
| for(i = 0; i < m * COMPSIZE; i++){ | |||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| fprintf(stderr, | |||
| " %10.2f MFlops\n", | |||
| COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -0,0 +1,191 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| #undef HER2K | |||
| #ifdef DOUBLE | |||
| #define HER2K BLASFUNC(zher2k) | |||
| #else | |||
| #define HER2K BLASFUNC(cher2k) | |||
| #endif | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
| #endif | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
| static void *huge_malloc(BLASLONG size){ | |||
| int shmid; | |||
| void *address; | |||
| #ifndef SHM_HUGETLB | |||
| #define SHM_HUGETLB 04000 | |||
| #endif | |||
| if ((shmid =shmget(IPC_PRIVATE, | |||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
| printf( "Memory allocation failed(shmget).\n"); | |||
| exit(1); | |||
| } | |||
| address = shmat(shmid, NULL, SHM_RND); | |||
| if ((BLASLONG)address == -1){ | |||
| printf( "Memory allocation failed(shmat).\n"); | |||
| exit(1); | |||
| } | |||
| shmctl(shmid, IPC_RMID, 0); | |||
| return address; | |||
| } | |||
| #define malloc huge_malloc | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| FLOAT *a, *b, *c; | |||
| FLOAT alpha[] = {1.0, 1.0}; | |||
| FLOAT beta [] = {1.0, 1.0}; | |||
| char *p; | |||
| char uplo='U'; | |||
| char trans='N'; | |||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
| blasint m, i, j; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| #ifdef linux | |||
| srandom(getpid()); | |||
| #endif | |||
| fprintf(stderr, " SIZE Flops\n"); | |||
| for(m = from; m <= to; m += step) | |||
| { | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| for(j = 0; j < m; j++){ | |||
| for(i = 0; i < m * COMPSIZE; i++){ | |||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| fprintf(stderr, | |||
| " %10.2f MFlops\n", | |||
| COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -0,0 +1,189 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| #undef HERK | |||
| #ifdef DOUBLE | |||
| #define HERK BLASFUNC(zherk) | |||
| #else | |||
| #define HERK BLASFUNC(cherk) | |||
| #endif | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
| #endif | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
| static void *huge_malloc(BLASLONG size){ | |||
| int shmid; | |||
| void *address; | |||
| #ifndef SHM_HUGETLB | |||
| #define SHM_HUGETLB 04000 | |||
| #endif | |||
| if ((shmid =shmget(IPC_PRIVATE, | |||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
| printf( "Memory allocation failed(shmget).\n"); | |||
| exit(1); | |||
| } | |||
| address = shmat(shmid, NULL, SHM_RND); | |||
| if ((BLASLONG)address == -1){ | |||
| printf( "Memory allocation failed(shmat).\n"); | |||
| exit(1); | |||
| } | |||
| shmctl(shmid, IPC_RMID, 0); | |||
| return address; | |||
| } | |||
| #define malloc huge_malloc | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| FLOAT *a, *c; | |||
| FLOAT alpha[] = {1.0, 1.0}; | |||
| FLOAT beta [] = {1.0, 1.0}; | |||
| char *p; | |||
| char uplo='U'; | |||
| char trans='N'; | |||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
| blasint m, i, j; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| #ifdef linux | |||
| srandom(getpid()); | |||
| #endif | |||
| fprintf(stderr, " SIZE Flops\n"); | |||
| for(m = from; m <= to; m += step) | |||
| { | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| for(j = 0; j < m; j++){ | |||
| for(i = 0; i < m * COMPSIZE; i++){ | |||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| fprintf(stderr, | |||
| " %10.2f MFlops\n", | |||
| COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -0,0 +1,203 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| #undef SYMM | |||
| #ifndef COMPLEX | |||
| #ifdef DOUBLE | |||
| #define SYMM BLASFUNC(dsymm) | |||
| #else | |||
| #define SYMM BLASFUNC(ssymm) | |||
| #endif | |||
| #else | |||
| #ifdef DOUBLE | |||
| #define SYMM BLASFUNC(zsymm) | |||
| #else | |||
| #define SYMM BLASFUNC(csymm) | |||
| #endif | |||
| #endif | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
| #endif | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
| static void *huge_malloc(BLASLONG size){ | |||
| int shmid; | |||
| void *address; | |||
| #ifndef SHM_HUGETLB | |||
| #define SHM_HUGETLB 04000 | |||
| #endif | |||
| if ((shmid =shmget(IPC_PRIVATE, | |||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
| printf( "Memory allocation failed(shmget).\n"); | |||
| exit(1); | |||
| } | |||
| address = shmat(shmid, NULL, SHM_RND); | |||
| if ((BLASLONG)address == -1){ | |||
| printf( "Memory allocation failed(shmat).\n"); | |||
| exit(1); | |||
| } | |||
| shmctl(shmid, IPC_RMID, 0); | |||
| return address; | |||
| } | |||
| #define malloc huge_malloc | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| FLOAT *a, *b, *c; | |||
| FLOAT alpha[] = {1.0, 1.0}; | |||
| FLOAT beta [] = {1.0, 1.0}; | |||
| char *p; | |||
| char side='L'; | |||
| char uplo='U'; | |||
| if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | |||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
| blasint m, i, j; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| #ifdef linux | |||
| srandom(getpid()); | |||
| #endif | |||
| fprintf(stderr, " SIZE Flops\n"); | |||
| for(m = from; m <= to; m += step) | |||
| { | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| for(j = 0; j < m; j++){ | |||
| for(i = 0; i < m * COMPSIZE; i++){ | |||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| fprintf(stderr, | |||
| " %10.2f MFlops\n", | |||
| COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -0,0 +1,203 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| #undef SYR2K | |||
| #ifndef COMPLEX | |||
| #ifdef DOUBLE | |||
| #define SYR2K BLASFUNC(dsyr2k) | |||
| #else | |||
| #define SYR2K BLASFUNC(ssyr2k) | |||
| #endif | |||
| #else | |||
| #ifdef DOUBLE | |||
| #define SYR2K BLASFUNC(zsyr2k) | |||
| #else | |||
| #define SYR2K BLASFUNC(csyr2k) | |||
| #endif | |||
| #endif | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
| #endif | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
| static void *huge_malloc(BLASLONG size){ | |||
| int shmid; | |||
| void *address; | |||
| #ifndef SHM_HUGETLB | |||
| #define SHM_HUGETLB 04000 | |||
| #endif | |||
| if ((shmid =shmget(IPC_PRIVATE, | |||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
| printf( "Memory allocation failed(shmget).\n"); | |||
| exit(1); | |||
| } | |||
| address = shmat(shmid, NULL, SHM_RND); | |||
| if ((BLASLONG)address == -1){ | |||
| printf( "Memory allocation failed(shmat).\n"); | |||
| exit(1); | |||
| } | |||
| shmctl(shmid, IPC_RMID, 0); | |||
| return address; | |||
| } | |||
| #define malloc huge_malloc | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| FLOAT *a, *b, *c; | |||
| FLOAT alpha[] = {1.0, 1.0}; | |||
| FLOAT beta [] = {1.0, 1.0}; | |||
| char *p; | |||
| char uplo='U'; | |||
| char trans='N'; | |||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
| blasint m, i, j; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| #ifdef linux | |||
| srandom(getpid()); | |||
| #endif | |||
| fprintf(stderr, " SIZE Flops\n"); | |||
| for(m = from; m <= to; m += step) | |||
| { | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| for(j = 0; j < m; j++){ | |||
| for(i = 0; i < m * COMPSIZE; i++){ | |||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| fprintf(stderr, | |||
| " %10.2f MFlops\n", | |||
| COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -0,0 +1,199 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| #undef SYRK | |||
| #ifndef COMPLEX | |||
| #ifdef DOUBLE | |||
| #define SYRK BLASFUNC(dsyrk) | |||
| #else | |||
| #define SYRK BLASFUNC(ssyrk) | |||
| #endif | |||
| #else | |||
| #ifdef DOUBLE | |||
| #define SYRK BLASFUNC(zsyrk) | |||
| #else | |||
| #define SYRK BLASFUNC(csyrk) | |||
| #endif | |||
| #endif | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
| #endif | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
| static void *huge_malloc(BLASLONG size){ | |||
| int shmid; | |||
| void *address; | |||
| #ifndef SHM_HUGETLB | |||
| #define SHM_HUGETLB 04000 | |||
| #endif | |||
| if ((shmid =shmget(IPC_PRIVATE, | |||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
| printf( "Memory allocation failed(shmget).\n"); | |||
| exit(1); | |||
| } | |||
| address = shmat(shmid, NULL, SHM_RND); | |||
| if ((BLASLONG)address == -1){ | |||
| printf( "Memory allocation failed(shmat).\n"); | |||
| exit(1); | |||
| } | |||
| shmctl(shmid, IPC_RMID, 0); | |||
| return address; | |||
| } | |||
| #define malloc huge_malloc | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| FLOAT *a, *c; | |||
| FLOAT alpha[] = {1.0, 1.0}; | |||
| FLOAT beta [] = {1.0, 1.0}; | |||
| char *p; | |||
| char uplo='U'; | |||
| char trans='N'; | |||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
| blasint m, i, j; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| #ifdef linux | |||
| srandom(getpid()); | |||
| #endif | |||
| fprintf(stderr, " SIZE Flops\n"); | |||
| for(m = from; m <= to; m += step) | |||
| { | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| for(j = 0; j < m; j++){ | |||
| for(i = 0; i < m * COMPSIZE; i++){ | |||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| fprintf(stderr, | |||
| " %10.2f MFlops\n", | |||
| COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -0,0 +1,202 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| #undef TRMM | |||
| #ifndef COMPLEX | |||
| #ifdef DOUBLE | |||
| #define TRMM BLASFUNC(dtrmm) | |||
| #else | |||
| #define TRMM BLASFUNC(strmm) | |||
| #endif | |||
| #else | |||
| #ifdef DOUBLE | |||
| #define TRMM BLASFUNC(ztrmm) | |||
| #else | |||
| #define TRMM BLASFUNC(ctrmm) | |||
| #endif | |||
| #endif | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
| #endif | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
| static void *huge_malloc(BLASLONG size){ | |||
| int shmid; | |||
| void *address; | |||
| #ifndef SHM_HUGETLB | |||
| #define SHM_HUGETLB 04000 | |||
| #endif | |||
| if ((shmid =shmget(IPC_PRIVATE, | |||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
| printf( "Memory allocation failed(shmget).\n"); | |||
| exit(1); | |||
| } | |||
| address = shmat(shmid, NULL, SHM_RND); | |||
| if ((BLASLONG)address == -1){ | |||
| printf( "Memory allocation failed(shmat).\n"); | |||
| exit(1); | |||
| } | |||
| shmctl(shmid, IPC_RMID, 0); | |||
| return address; | |||
| } | |||
| #define malloc huge_malloc | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| FLOAT *a, *b; | |||
| FLOAT alpha[] = {1.0, 1.0}; | |||
| FLOAT beta [] = {1.0, 1.0}; | |||
| char *p; | |||
| char side ='L'; | |||
| char uplo ='U'; | |||
| char trans='N'; | |||
| char diag ='U'; | |||
| if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | |||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
| if ((p = getenv("OPENBLAS_DIAG"))) diag=*p; | |||
| blasint m, i, j; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| #ifdef linux | |||
| srandom(getpid()); | |||
| #endif | |||
| fprintf(stderr, " SIZE Flops\n"); | |||
| for(m = from; m <= to; m += step) | |||
| { | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| for(j = 0; j < m; j++){ | |||
| for(i = 0; i < m * COMPSIZE; i++){ | |||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| fprintf(stderr, | |||
| " %10.2f MFlops\n", | |||
| COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -0,0 +1,202 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #ifdef __CYGWIN32__ | |||
| #include <sys/time.h> | |||
| #endif | |||
| #include "common.h" | |||
| #undef TRSM | |||
| #ifndef COMPLEX | |||
| #ifdef DOUBLE | |||
| #define TRSM BLASFUNC(dtrsm) | |||
| #else | |||
| #define TRSM BLASFUNC(strsm) | |||
| #endif | |||
| #else | |||
| #ifdef DOUBLE | |||
| #define TRSM BLASFUNC(ztrsm) | |||
| #else | |||
| #define TRSM BLASFUNC(ctrsm) | |||
| #endif | |||
| #endif | |||
| #if defined(__WIN32__) || defined(__WIN64__) | |||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
| #endif | |||
| int gettimeofday(struct timeval *tv, void *tz){ | |||
| FILETIME ft; | |||
| unsigned __int64 tmpres = 0; | |||
| static int tzflag; | |||
| if (NULL != tv) | |||
| { | |||
| GetSystemTimeAsFileTime(&ft); | |||
| tmpres |= ft.dwHighDateTime; | |||
| tmpres <<= 32; | |||
| tmpres |= ft.dwLowDateTime; | |||
| /*converting file time to unix epoch*/ | |||
| tmpres /= 10; /*convert into microseconds*/ | |||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||
| } | |||
| return 0; | |||
| } | |||
| #endif | |||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
| static void *huge_malloc(BLASLONG size){ | |||
| int shmid; | |||
| void *address; | |||
| #ifndef SHM_HUGETLB | |||
| #define SHM_HUGETLB 04000 | |||
| #endif | |||
| if ((shmid =shmget(IPC_PRIVATE, | |||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
| printf( "Memory allocation failed(shmget).\n"); | |||
| exit(1); | |||
| } | |||
| address = shmat(shmid, NULL, SHM_RND); | |||
| if ((BLASLONG)address == -1){ | |||
| printf( "Memory allocation failed(shmat).\n"); | |||
| exit(1); | |||
| } | |||
| shmctl(shmid, IPC_RMID, 0); | |||
| return address; | |||
| } | |||
| #define malloc huge_malloc | |||
| #endif | |||
| int MAIN__(int argc, char *argv[]){ | |||
| FLOAT *a, *b; | |||
| FLOAT alpha[] = {1.0, 1.0}; | |||
| FLOAT beta [] = {1.0, 1.0}; | |||
| char *p; | |||
| char side ='L'; | |||
| char uplo ='U'; | |||
| char trans='N'; | |||
| char diag ='U'; | |||
| if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | |||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
| if ((p = getenv("OPENBLAS_DIAG"))) diag=*p; | |||
| blasint m, i, j; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag); | |||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| #ifdef linux | |||
| srandom(getpid()); | |||
| #endif | |||
| fprintf(stderr, " SIZE Flops\n"); | |||
| for(m = from; m <= to; m += step) | |||
| { | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| for(j = 0; j < m; j++){ | |||
| for(i = 0; i < m * COMPSIZE; i++){ | |||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| fprintf(stderr, | |||
| " %10.2f MFlops\n", | |||
| COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
| } | |||
| return 0; | |||
| } | |||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -1062,7 +1062,11 @@ int get_cpuname(void){ | |||
| case 12: | |||
| case 15: | |||
| if(support_avx()) | |||
| #ifndef NO_AVX2 | |||
| return CPUTYPE_HASWELL; | |||
| #else | |||
| return CPUTYPE_SANDYBRIDGE; | |||
| #endif | |||
| else | |||
| return CPUTYPE_NEHALEM; | |||
| } | |||
| @@ -1072,7 +1076,11 @@ int get_cpuname(void){ | |||
| case 5: | |||
| case 6: | |||
| if(support_avx()) | |||
| #ifndef NO_AVX2 | |||
| return CPUTYPE_HASWELL; | |||
| #else | |||
| return CPUTYPE_SANDYBRIDGE; | |||
| #endif | |||
| else | |||
| return CPUTYPE_NEHALEM; | |||
| } | |||
| @@ -1471,7 +1479,11 @@ int get_coretype(void){ | |||
| case 12: | |||
| case 15: | |||
| if(support_avx()) | |||
| #ifndef NO_AVX2 | |||
| return CORE_HASWELL; | |||
| #else | |||
| return CORE_SANDYBRIDGE; | |||
| #endif | |||
| else | |||
| return CORE_NEHALEM; | |||
| } | |||
| @@ -1481,7 +1493,11 @@ int get_coretype(void){ | |||
| case 5: | |||
| case 6: | |||
| if(support_avx()) | |||
| #ifndef NO_AVX2 | |||
| return CORE_HASWELL; | |||
| #else | |||
| return CORE_SANDYBRIDGE; | |||
| #endif | |||
| else | |||
| return CORE_NEHALEM; | |||
| } | |||
| @@ -66,7 +66,11 @@ extern gotoblas_t gotoblas_BOBCAT; | |||
| extern gotoblas_t gotoblas_SANDYBRIDGE; | |||
| extern gotoblas_t gotoblas_BULLDOZER; | |||
| extern gotoblas_t gotoblas_PILEDRIVER; | |||
| #ifdef NO_AVX2 | |||
| #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | |||
| #else | |||
| extern gotoblas_t gotoblas_HASWELL; | |||
| #endif | |||
| #else | |||
| //Use NEHALEM kernels for sandy bridge | |||
| #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | |||
| @@ -356,25 +356,25 @@ ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) | |||
| XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) | |||
| #SLAPACKOBJS = \ | |||
| # sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \ | |||
| # spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \ | |||
| # slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \ | |||
| # sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ | |||
| # spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ | |||
| # slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX) | |||
| SLAPACKOBJS = \ | |||
| sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ | |||
| spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ | |||
| slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX) | |||
| slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) | |||
| #DLAPACKOBJS = \ | |||
| # dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \ | |||
| # dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \ | |||
| # dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \ | |||
| # dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ | |||
| # dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ | |||
| # dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX) | |||
| DLAPACKOBJS = \ | |||
| dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ | |||
| dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ | |||
| dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX) | |||
| dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) | |||
| QLAPACKOBJS = \ | |||
| @@ -382,28 +382,29 @@ QLAPACKOBJS = \ | |||
| qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \ | |||
| qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \ | |||
| #CLAPACKOBJS = \ | |||
| # cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \ | |||
| # cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \ | |||
| # claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \ | |||
| # cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ | |||
| # cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ | |||
| # clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) | |||
| CLAPACKOBJS = \ | |||
| cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ | |||
| cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ | |||
| clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) | |||
| clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) | |||
| #ZLAPACKOBJS = \ | |||
| # zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \ | |||
| # zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \ | |||
| # zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \ | |||
| # zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ | |||
| # zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ | |||
| # zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX) | |||
| ZLAPACKOBJS = \ | |||
| zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ | |||
| zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ | |||
| zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX) | |||
| zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) | |||
| XLAPACKOBJS = \ | |||
| @@ -1,5 +1,7 @@ | |||
| SGEMVNKERNEL = ../arm/gemv_n.c | |||
| SGEMVTKERNEL = ../arm/gemv_t.c | |||
| CGEMVNKERNEL = ../arm/zgemv_n.c | |||
| CGEMVTKERNEL = ../arm/zgemv_t.c | |||
| DGEMVNKERNEL = ../arm/gemv_n.c | |||
| DGEMVTKERNEL = ../arm/gemv_t.c | |||
| @@ -96,12 +98,12 @@ ZSWAPKERNEL = swap_vfp.S | |||
| # BAD SGEMVNKERNEL = gemv_n_vfp.S | |||
| # BAD DGEMVNKERNEL = gemv_n_vfp.S | |||
| CGEMVNKERNEL = cgemv_n_vfp.S | |||
| # CGEMVNKERNEL = cgemv_n_vfp.S | |||
| ZGEMVNKERNEL = zgemv_n_vfp.S | |||
| # BAD SGEMVTKERNEL = gemv_t_vfp.S | |||
| # BAD DGEMVTKERNEL = gemv_t_vfp.S | |||
| CGEMVTKERNEL = cgemv_t_vfp.S | |||
| # CGEMVTKERNEL = cgemv_t_vfp.S | |||
| ZGEMVTKERNEL = zgemv_t_vfp.S | |||
| STRMMKERNEL = strmm_kernel_4x2_vfp.S | |||
| @@ -1,5 +1,7 @@ | |||
| SGEMVNKERNEL = ../arm/gemv_n.c | |||
| SGEMVTKERNEL = ../arm/gemv_t.c | |||
| CGEMVNKERNEL = ../arm/zgemv_n.c | |||
| CGEMVTKERNEL = ../arm/zgemv_t.c | |||
| ################################################################################# | |||
| @@ -77,12 +79,12 @@ ZSCALKERNEL = zscal.c | |||
| # BAD SGEMVNKERNEL = gemv_n_vfp.S | |||
| DGEMVNKERNEL = gemv_n_vfp.S | |||
| CGEMVNKERNEL = cgemv_n_vfp.S | |||
| #CGEMVNKERNEL = cgemv_n_vfp.S | |||
| ZGEMVNKERNEL = zgemv_n_vfp.S | |||
| # BAD SGEMVTKERNEL = gemv_t_vfp.S | |||
| DGEMVTKERNEL = gemv_t_vfp.S | |||
| CGEMVTKERNEL = cgemv_t_vfp.S | |||
| #CGEMVTKERNEL = cgemv_t_vfp.S | |||
| ZGEMVTKERNEL = zgemv_t_vfp.S | |||
| STRMMKERNEL = strmm_kernel_4x4_vfpv3.S | |||
| @@ -1,263 +1,3 @@ | |||
| GEMVDEP = ../l2param.h | |||
| ifdef HAVE_SSE | |||
| ifndef SAMAXKERNEL | |||
| SAMAXKERNEL = amax_sse.S | |||
| endif | |||
| ifndef CAMAXKERNEL | |||
| CAMAXKERNEL = zamax_sse.S | |||
| endif | |||
| ifndef SAMINKERNEL | |||
| SAMINKERNEL = amax_sse.S | |||
| endif | |||
| ifndef CAMINKERNEL | |||
| CAMINKERNEL = zamax_sse.S | |||
| endif | |||
| ifndef ISAMAXKERNEL | |||
| ISAMAXKERNEL = iamax_sse.S | |||
| endif | |||
| ifndef ICAMAXKERNEL | |||
| ICAMAXKERNEL = izamax_sse.S | |||
| endif | |||
| ifndef ISAMINKERNEL | |||
| ISAMINKERNEL = iamax_sse.S | |||
| endif | |||
| ifndef ICAMINKERNEL | |||
| ICAMINKERNEL = izamax_sse.S | |||
| endif | |||
| ifndef ISMAXKERNEL | |||
| ISMAXKERNEL = iamax_sse.S | |||
| endif | |||
| ifndef ISMINKERNEL | |||
| ISMINKERNEL = iamax_sse.S | |||
| endif | |||
| ifndef SMAXKERNEL | |||
| SMAXKERNEL = amax_sse.S | |||
| endif | |||
| ifndef SMINKERNEL | |||
| SMINKERNEL = amax_sse.S | |||
| endif | |||
| ifndef SASUMKERNEL | |||
| SASUMKERNEL = asum_sse.S | |||
| endif | |||
| ifndef CASUMKERNEL | |||
| CASUMKERNEL = zasum_sse.S | |||
| endif | |||
| ifndef SDOTKERNEL | |||
| SDOTKERNEL = ../arm/dot.c | |||
| endif | |||
| ifndef CDOTKERNEL | |||
| CDOTKERNEL = zdot_sse.S | |||
| endif | |||
| ifndef SCOPYKERNEL | |||
| SCOPYKERNEL = copy_sse.S | |||
| endif | |||
| ifndef CCOPYKERNEL | |||
| CCOPYKERNEL = zcopy_sse.S | |||
| endif | |||
| ifndef SSACALKERNEL | |||
| SSCALKERNEL = scal_sse.S | |||
| endif | |||
| ifndef CSACALKERNEL | |||
| CSCALKERNEL = zscal_sse.S | |||
| endif | |||
| ifndef SAXPYKERNEL | |||
| SAXPYKERNEL = axpy_sse.S | |||
| endif | |||
| ifndef CAXPYKERNEL | |||
| CAXPYKERNEL = zaxpy_sse.S | |||
| endif | |||
| ifndef SROTKERNEL | |||
| SROTKERNEL = rot_sse.S | |||
| endif | |||
| ifndef CROTKERNEL | |||
| CROTKERNEL = zrot_sse.S | |||
| endif | |||
| ifndef SSWAPKERNEL | |||
| SSWAPKERNEL = swap_sse.S | |||
| endif | |||
| ifndef CSWAPKERNEL | |||
| CSWAPKERNEL = zswap_sse.S | |||
| endif | |||
| ifndef SGEMVNKERNEL | |||
| SGEMVNKERNEL = ../arm/gemv_n.c | |||
| endif | |||
| ifndef SGEMVTKERNEL | |||
| SGEMVTKERNEL = ../arm/gemv_t.c | |||
| endif | |||
| ifndef CGEMVNKERNEL | |||
| CGEMVNKERNEL = zgemv_n_sse.S | |||
| endif | |||
| ifndef CGEMVTKERNEL | |||
| CGEMVTKERNEL = zgemv_t_sse.S | |||
| endif | |||
| endif | |||
| ifdef HAVE_SSE2 | |||
| ifndef DAMAXKERNEL | |||
| DAMAXKERNEL = amax_sse2.S | |||
| endif | |||
| ifndef ZAMAXKERNEL | |||
| ZAMAXKERNEL = zamax_sse2.S | |||
| endif | |||
| ifndef DAMINKERNEL | |||
| DAMINKERNEL = amax_sse2.S | |||
| endif | |||
| ifndef ZAMINKERNEL | |||
| ZAMINKERNEL = zamax_sse2.S | |||
| endif | |||
| ifndef IDAMAXKERNEL | |||
| IDAMAXKERNEL = iamax_sse2.S | |||
| endif | |||
| ifndef IZAMAXKERNEL | |||
| IZAMAXKERNEL = izamax_sse2.S | |||
| endif | |||
| ifndef IDAMINKERNEL | |||
| IDAMINKERNEL = iamax_sse2.S | |||
| endif | |||
| ifndef IZAMINKERNEL | |||
| IZAMINKERNEL = izamax_sse2.S | |||
| endif | |||
| ifndef IDMAXKERNEL | |||
| IDMAXKERNEL = iamax_sse2.S | |||
| endif | |||
| ifndef IDMINKERNEL | |||
| IDMINKERNEL = iamax_sse2.S | |||
| endif | |||
| ifndef DMAXKERNEL | |||
| DMAXKERNEL = amax_sse2.S | |||
| endif | |||
| ifndef DMINKERNEL | |||
| DMINKERNEL = amax_sse2.S | |||
| endif | |||
| ifndef DDOTKERNEL | |||
| DDOTKERNEL = dot_sse2.S | |||
| endif | |||
| ifndef ZDOTKERNEL | |||
| ZDOTKERNEL = zdot_sse2.S | |||
| endif | |||
| ifndef DCOPYKERNEL | |||
| # DCOPYKERNEL = copy_sse2.S | |||
| endif | |||
| ifndef ZCOPYKERNEL | |||
| ZCOPYKERNEL = zcopy_sse2.S | |||
| endif | |||
| ifndef DSACALKERNEL | |||
| DSCALKERNEL = scal_sse2.S | |||
| endif | |||
| ifndef ZSACALKERNEL | |||
| ZSCALKERNEL = zscal_sse2.S | |||
| endif | |||
| ifndef DASUMKERNEL | |||
| DASUMKERNEL = asum_sse2.S | |||
| endif | |||
| ifndef ZASUMKERNEL | |||
| ZASUMKERNEL = zasum_sse2.S | |||
| endif | |||
| ifndef DAXPYKERNEL | |||
| DAXPYKERNEL = axpy_sse2.S | |||
| endif | |||
| ifndef ZAXPYKERNEL | |||
| ZAXPYKERNEL = zaxpy_sse2.S | |||
| endif | |||
| ifndef SNRM2KERNEL | |||
| SNRM2KERNEL = nrm2_sse.S | |||
| endif | |||
| ifndef CNRM2KERNEL | |||
| CNRM2KERNEL = znrm2_sse.S | |||
| endif | |||
| ifndef DROTKERNEL | |||
| DROTKERNEL = rot_sse2.S | |||
| endif | |||
| ifndef ZROTKERNEL | |||
| ZROTKERNEL = zrot_sse2.S | |||
| endif | |||
| ifndef DSWAPKERNEL | |||
| DSWAPKERNEL = swap_sse2.S | |||
| endif | |||
| ifndef ZSWAPKERNEL | |||
| ZSWAPKERNEL = zswap_sse2.S | |||
| endif | |||
| ifndef DGEMVNKERNEL | |||
| DGEMVNKERNEL = gemv_n_sse2.S | |||
| endif | |||
| ifndef DGEMVTKERNEL | |||
| DGEMVTKERNEL = gemv_t_sse2.S | |||
| endif | |||
| ifndef ZGEMVNKERNEL | |||
| ZGEMVNKERNEL = zgemv_n_sse2.S | |||
| endif | |||
| ifndef ZGEMVTKERNEL | |||
| ZGEMVTKERNEL = zgemv_t_sse2.S | |||
| endif | |||
| endif | |||
| ifndef SAMINKERNEL | |||
| SAMINKERNEL = amax.S | |||
| endif | |||
| @@ -394,21 +134,41 @@ XGEMMITCOPYOBJ = | |||
| XGEMMONCOPYOBJ = xgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| XGEMMOTCOPYOBJ = xgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMM_BETA = gemm_beta.S | |||
| DGEMM_BETA = gemm_beta.S | |||
| QGEMM_BETA = ../generic/gemm_beta.c | |||
| CGEMM_BETA = zgemm_beta.S | |||
| ZGEMM_BETA = zgemm_beta.S | |||
| XGEMM_BETA = ../generic/zgemm_beta.c | |||
| QTRSMKERNEL_LN = qtrsm_kernel_LN_2x2.S | |||
| QTRSMKERNEL_LT = qtrsm_kernel_LT_2x2.S | |||
| QTRSMKERNEL_RN = qtrsm_kernel_LT_2x2.S | |||
| QTRSMKERNEL_RT = qtrsm_kernel_RT_2x2.S | |||
| QTRSMKERNEL_LN = qtrsm_kernel_LN_2x2.S | |||
| QTRSMKERNEL_LT = qtrsm_kernel_LT_2x2.S | |||
| QTRSMKERNEL_RN = qtrsm_kernel_LT_2x2.S | |||
| QTRSMKERNEL_RT = qtrsm_kernel_RT_2x2.S | |||
| XTRSMKERNEL_LN = xtrsm_kernel_LT_1x1.S | |||
| XTRSMKERNEL_LT = xtrsm_kernel_LT_1x1.S | |||
| XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S | |||
| XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S | |||
| XTRSMKERNEL_LN = xtrsm_kernel_LT_1x1.S | |||
| XTRSMKERNEL_LT = xtrsm_kernel_LT_1x1.S | |||
| XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S | |||
| XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S | |||
| XGEMM3MKERNEL = xgemm3m_kernel_2x2.S | |||
| # bug in zdot assembler kernel | |||
| ifndef ZDOTKERNEL | |||
| ZDOTKERNEL = ../arm/zdot.c | |||
| endif | |||
| DSDOTKERNEL = ../arm/dot.c | |||
| # Bug in znrm2 assembler kernel | |||
| ifndef ZNRM2KERNEL | |||
| ZNRM2KERNEL = ../arm/znrm2.c | |||
| endif | |||
| # Bug in zgemv_t assembler kernel | |||
| ifndef ZGEMVTKERNEL | |||
| ZGEMVTKERNEL = ../arm/zgemv_t.c | |||
| endif | |||
| SGEMM_BETA = ../generic/gemm_beta.c | |||
| DGEMM_BETA = ../generic/gemm_beta.c | |||
| CGEMM_BETA = ../generic/zgemm_beta.c | |||
| ZGEMM_BETA = ../generic/zgemm_beta.c | |||
| QGEMM_BETA = ../generic/gemm_beta.c | |||
| XGEMM_BETA = ../generic/zgemm_beta.c | |||
| @@ -1,6 +1,3 @@ | |||
| SGEMVNKERNEL = sgemv_n.S | |||
| SGEMVTKERNEL = sgemv_t.S | |||
| ZGEMVNKERNEL = zgemv_n_dup.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| @@ -1,5 +1,3 @@ | |||
| SGEMVNKERNEL = sgemv_n.S | |||
| SGEMVTKERNEL = sgemv_t.S | |||
| ZGEMVNKERNEL = zgemv_n_dup.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| @@ -1,6 +1,3 @@ | |||
| SGEMVNKERNEL = sgemv_n.S | |||
| SGEMVTKERNEL = sgemv_t.S | |||
| SGEMMKERNEL = sgemm_kernel_16x4_haswell.S | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
| @@ -1,6 +1,3 @@ | |||
| SGEMVNKERNEL = sgemv_n.S | |||
| SGEMVTKERNEL = sgemv_t.S | |||
| SGEMMKERNEL = gemm_kernel_4x8_nehalem.S | |||
| SGEMMINCOPY = gemm_ncopy_4.S | |||
| @@ -1,5 +1,3 @@ | |||
| SGEMVNKERNEL = sgemv_n.S | |||
| SGEMVTKERNEL = sgemv_t.S | |||
| ZGEMVNKERNEL = zgemv_n_dup.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| @@ -1,5 +1,3 @@ | |||
| SGEMVNKERNEL = sgemv_n.S | |||
| SGEMVTKERNEL = sgemv_t.S | |||
| SGEMMKERNEL = sgemm_kernel_16x4_sandy.S | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
| @@ -155,7 +155,7 @@ SLASRC = \ | |||
| sbbcsd.o slapmr.o sorbdb.o sorbdb1.o sorbdb2.o sorbdb3.o sorbdb4.o \ | |||
| sorbdb5.o sorbdb6.o sorcsd.o sorcsd2by1.o \ | |||
| sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \ | |||
| stpqrt.o stpqrt2.o stpmqrt.o stprfb.o | |||
| stpqrt.o stpqrt2.o stpmqrt.o stprfb.o spotri.o | |||
| DSLASRC = spotrs.o | |||
| @@ -236,7 +236,7 @@ CLASRC = \ | |||
| cbbcsd.o clapmr.o cunbdb.o cunbdb1.o cunbdb2.o cunbdb3.o cunbdb4.o \ | |||
| cunbdb5.o cunbdb6.o cuncsd.o cuncsd2by1.o \ | |||
| cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \ | |||
| ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o | |||
| ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o cpotri.o | |||
| ifdef USEXBLAS | |||
| CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ | |||
| @@ -316,7 +316,7 @@ DLASRC = \ | |||
| dbbcsd.o dlapmr.o dorbdb.o dorbdb1.o dorbdb2.o dorbdb3.o dorbdb4.o \ | |||
| dorbdb5.o dorbdb6.o dorcsd.o dorcsd2by1.o \ | |||
| dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \ | |||
| dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o | |||
| dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o dpotri.o | |||
| ifdef USEXBLAS | |||
| DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ | |||
| @@ -400,7 +400,7 @@ ZLASRC = \ | |||
| zbbcsd.o zlapmr.o zunbdb.o zunbdb1.o zunbdb2.o zunbdb3.o zunbdb4.o \ | |||
| zunbdb5.o zunbdb6.o zuncsd.o zuncsd2by1.o \ | |||
| zgeqrt.o zgeqrt2.o zgeqrt3.o zgemqrt.o \ | |||
| ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o | |||
| ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o zpotri.o | |||
| ifdef USEXBLAS | |||
| ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ | |||
| @@ -321,24 +321,24 @@ | |||
| * | |||
| MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) | |||
| * Compute space needed for CGEQRF | |||
| CALL CGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_CGEQRF=DUM(1) | |||
| CALL CGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_CGEQRF=CDUM(1) | |||
| * Compute space needed for CUNGQR | |||
| CALL CUNGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_CUNGQR_N=DUM(1) | |||
| CALL CUNGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_CUNGQR_M=DUM(1) | |||
| CALL CUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_CUNGQR_N=CDUM(1) | |||
| CALL CUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_CUNGQR_M=CDUM(1) | |||
| * Compute space needed for CGEBRD | |||
| CALL CGEBRD( N, N, A, LDA, S, DUM(1), DUM(1), | |||
| $ DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_CGEBRD=DUM(1) | |||
| CALL CGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1), | |||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_CGEBRD=CDUM(1) | |||
| * Compute space needed for CUNGBR | |||
| CALL CUNGBR( 'P', N, N, N, A, LDA, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_P=DUM(1) | |||
| CALL CUNGBR( 'Q', N, N, N, A, LDA, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_Q=DUM(1) | |||
| CALL CUNGBR( 'P', N, N, N, A, LDA, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_P=CDUM(1) | |||
| CALL CUNGBR( 'Q', N, N, N, A, LDA, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_Q=CDUM(1) | |||
| * | |||
| MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) | |||
| IF( M.GE.MNTHR ) THEN | |||
| @@ -444,20 +444,20 @@ | |||
| * | |||
| * Path 10 (M at least N, but not much larger) | |||
| * | |||
| CALL CGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), | |||
| $ DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_CGEBRD=DUM(1) | |||
| CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), | |||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_CGEBRD=CDUM(1) | |||
| MAXWRK = 2*N + LWORK_CGEBRD | |||
| IF( WNTUS .OR. WNTUO ) THEN | |||
| CALL CUNGBR( 'Q', M, N, N, A, LDA, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_Q=DUM(1) | |||
| CALL CUNGBR( 'Q', M, N, N, A, LDA, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_Q=CDUM(1) | |||
| MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q ) | |||
| END IF | |||
| IF( WNTUA ) THEN | |||
| CALL CUNGBR( 'Q', M, M, N, A, LDA, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_Q=DUM(1) | |||
| CALL CUNGBR( 'Q', M, M, N, A, LDA, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_Q=CDUM(1) | |||
| MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q ) | |||
| END IF | |||
| IF( .NOT.WNTVN ) THEN | |||
| @@ -471,25 +471,26 @@ | |||
| * | |||
| MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) | |||
| * Compute space needed for CGELQF | |||
| CALL CGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_CGELQF=DUM(1) | |||
| CALL CGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_CGELQF=CDUM(1) | |||
| * Compute space needed for CUNGLQ | |||
| CALL CUNGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_CUNGLQ_N=DUM(1) | |||
| CALL CUNGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_CUNGLQ_M=DUM(1) | |||
| CALL CUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1, | |||
| $ IERR ) | |||
| LWORK_CUNGLQ_N=CDUM(1) | |||
| CALL CUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_CUNGLQ_M=CDUM(1) | |||
| * Compute space needed for CGEBRD | |||
| CALL CGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), | |||
| $ DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_CGEBRD=DUM(1) | |||
| CALL CGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1), | |||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_CGEBRD=CDUM(1) | |||
| * Compute space needed for CUNGBR P | |||
| CALL CUNGBR( 'P', M, M, M, A, N, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_P=DUM(1) | |||
| CALL CUNGBR( 'P', M, M, M, A, N, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_P=CDUM(1) | |||
| * Compute space needed for CUNGBR Q | |||
| CALL CUNGBR( 'Q', M, M, M, A, N, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_Q=DUM(1) | |||
| CALL CUNGBR( 'Q', M, M, M, A, N, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_Q=CDUM(1) | |||
| IF( N.GE.MNTHR ) THEN | |||
| IF( WNTVN ) THEN | |||
| * | |||
| @@ -593,21 +594,21 @@ | |||
| * | |||
| * Path 10t(N greater than M, but not much larger) | |||
| * | |||
| CALL CGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), | |||
| $ DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_CGEBRD=DUM(1) | |||
| CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), | |||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_CGEBRD=CDUM(1) | |||
| MAXWRK = 2*M + LWORK_CGEBRD | |||
| IF( WNTVS .OR. WNTVO ) THEN | |||
| * Compute space needed for CUNGBR P | |||
| CALL CUNGBR( 'P', M, N, M, A, N, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_P=DUM(1) | |||
| CALL CUNGBR( 'P', M, N, M, A, N, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_P=CDUM(1) | |||
| MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P ) | |||
| END IF | |||
| IF( WNTVA ) THEN | |||
| CALL CUNGBR( 'P', N, N, M, A, N, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_P=DUM(1) | |||
| CALL CUNGBR( 'P', N, N, M, A, N, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_CUNGBR_P=CDUM(1) | |||
| MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P ) | |||
| END IF | |||
| IF( .NOT.WNTUN ) THEN | |||
| @@ -286,7 +286,7 @@ | |||
| CLANHF = ZERO | |||
| RETURN | |||
| ELSE IF( N.EQ.1 ) THEN | |||
| CLANHF = ABS( A(0) ) | |||
| CLANHF = ABS(REAL(A(0))) | |||
| RETURN | |||
| END IF | |||
| * | |||
| @@ -321,24 +321,24 @@ | |||
| * | |||
| MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) | |||
| * Compute space needed for ZGEQRF | |||
| CALL ZGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_ZGEQRF=DUM(1) | |||
| CALL ZGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_ZGEQRF=CDUM(1) | |||
| * Compute space needed for ZUNGQR | |||
| CALL ZUNGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_ZUNGQR_N=DUM(1) | |||
| CALL ZUNGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_ZUNGQR_M=DUM(1) | |||
| CALL ZUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGQR_N=CDUM(1) | |||
| CALL ZUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGQR_M=CDUM(1) | |||
| * Compute space needed for ZGEBRD | |||
| CALL ZGEBRD( N, N, A, LDA, S, DUM(1), DUM(1), | |||
| $ DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_ZGEBRD=DUM(1) | |||
| CALL ZGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1), | |||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_ZGEBRD=CDUM(1) | |||
| * Compute space needed for ZUNGBR | |||
| CALL ZUNGBR( 'P', N, N, N, A, LDA, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_P=DUM(1) | |||
| CALL ZUNGBR( 'Q', N, N, N, A, LDA, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_Q=DUM(1) | |||
| CALL ZUNGBR( 'P', N, N, N, A, LDA, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_P=CDUM(1) | |||
| CALL ZUNGBR( 'Q', N, N, N, A, LDA, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_Q=CDUM(1) | |||
| * | |||
| IF( M.GE.MNTHR ) THEN | |||
| IF( WNTUN ) THEN | |||
| @@ -443,20 +443,20 @@ | |||
| * | |||
| * Path 10 (M at least N, but not much larger) | |||
| * | |||
| CALL ZGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), | |||
| $ DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_ZGEBRD=DUM(1) | |||
| CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), | |||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_ZGEBRD=CDUM(1) | |||
| MAXWRK = 2*N + LWORK_ZGEBRD | |||
| IF( WNTUS .OR. WNTUO ) THEN | |||
| CALL ZUNGBR( 'Q', M, N, N, A, LDA, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_Q=DUM(1) | |||
| CALL ZUNGBR( 'Q', M, N, N, A, LDA, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_Q=CDUM(1) | |||
| MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q ) | |||
| END IF | |||
| IF( WNTUA ) THEN | |||
| CALL ZUNGBR( 'Q', M, M, N, A, LDA, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_Q=DUM(1) | |||
| CALL ZUNGBR( 'Q', M, M, N, A, LDA, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_Q=CDUM(1) | |||
| MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q ) | |||
| END IF | |||
| IF( .NOT.WNTVN ) THEN | |||
| @@ -470,25 +470,26 @@ | |||
| * | |||
| MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) | |||
| * Compute space needed for ZGELQF | |||
| CALL ZGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_ZGELQF=DUM(1) | |||
| CALL ZGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_ZGELQF=CDUM(1) | |||
| * Compute space needed for ZUNGLQ | |||
| CALL ZUNGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_ZUNGLQ_N=DUM(1) | |||
| CALL ZUNGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_ZUNGLQ_M=DUM(1) | |||
| CALL ZUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1, | |||
| $ IERR ) | |||
| LWORK_ZUNGLQ_N=CDUM(1) | |||
| CALL ZUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGLQ_M=CDUM(1) | |||
| * Compute space needed for ZGEBRD | |||
| CALL ZGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), | |||
| $ DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_ZGEBRD=DUM(1) | |||
| CALL ZGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1), | |||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_ZGEBRD=CDUM(1) | |||
| * Compute space needed for ZUNGBR P | |||
| CALL ZUNGBR( 'P', M, M, M, A, N, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_P=DUM(1) | |||
| CALL ZUNGBR( 'P', M, M, M, A, N, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_P=CDUM(1) | |||
| * Compute space needed for ZUNGBR Q | |||
| CALL ZUNGBR( 'Q', M, M, M, A, N, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_Q=DUM(1) | |||
| CALL ZUNGBR( 'Q', M, M, M, A, N, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_Q=CDUM(1) | |||
| IF( N.GE.MNTHR ) THEN | |||
| IF( WNTVN ) THEN | |||
| * | |||
| @@ -592,21 +593,21 @@ | |||
| * | |||
| * Path 10t(N greater than M, but not much larger) | |||
| * | |||
| CALL ZGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), | |||
| $ DUM(1), DUM(1), -1, IERR ) | |||
| LWORK_ZGEBRD=DUM(1) | |||
| CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), | |||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||
| LWORK_ZGEBRD=CDUM(1) | |||
| MAXWRK = 2*M + LWORK_ZGEBRD | |||
| IF( WNTVS .OR. WNTVO ) THEN | |||
| * Compute space needed for ZUNGBR P | |||
| CALL ZUNGBR( 'P', M, N, M, A, N, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_P=DUM(1) | |||
| CALL ZUNGBR( 'P', M, N, M, A, N, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_P=CDUM(1) | |||
| MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P ) | |||
| END IF | |||
| IF( WNTVA ) THEN | |||
| CALL ZUNGBR( 'P', N, N, M, A, N, DUM(1), | |||
| $ DUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_P=DUM(1) | |||
| CALL ZUNGBR( 'P', N, N, M, A, N, CDUM(1), | |||
| $ CDUM(1), -1, IERR ) | |||
| LWORK_ZUNGBR_P=CDUM(1) | |||
| MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P ) | |||
| END IF | |||
| IF( .NOT.WNTUN ) THEN | |||
| @@ -286,7 +286,7 @@ | |||
| ZLANHF = ZERO | |||
| RETURN | |||
| ELSE IF( N.EQ.1 ) THEN | |||
| ZLANHF = ABS( A(0) ) | |||
| ZLANHF = ABS(DBLE(A(0))) | |||
| RETURN | |||
| END IF | |||
| * | |||
| @@ -526,10 +526,10 @@ | |||
| IF (SN.NE.ZERO) THEN | |||
| IF (CS.NE.ZERO) THEN | |||
| ISUPPZ(2*M-1) = 1 | |||
| ISUPPZ(2*M-1) = 2 | |||
| ISUPPZ(2*M) = 2 | |||
| ELSE | |||
| ISUPPZ(2*M-1) = 1 | |||
| ISUPPZ(2*M-1) = 1 | |||
| ISUPPZ(2*M) = 1 | |||
| END IF | |||
| ELSE | |||
| ISUPPZ(2*M-1) = 2 | |||
| @@ -550,10 +550,10 @@ | |||
| IF (SN.NE.ZERO) THEN | |||
| IF (CS.NE.ZERO) THEN | |||
| ISUPPZ(2*M-1) = 1 | |||
| ISUPPZ(2*M-1) = 2 | |||
| ISUPPZ(2*M) = 2 | |||
| ELSE | |||
| ISUPPZ(2*M-1) = 1 | |||
| ISUPPZ(2*M-1) = 1 | |||
| ISUPPZ(2*M) = 1 | |||
| END IF | |||
| ELSE | |||
| ISUPPZ(2*M-1) = 2 | |||
| @@ -1,8 +1,8 @@ | |||
| Data file for testing DSGESV/DSPOSV LAPACK routines | |||
| 11 Number of values of M | |||
| 0 1 2 13 17 45 78 91 101 120 132 Values of M (row dimension) | |||
| 12 Number of values of M | |||
| 0 1 2 13 17 45 78 91 101 119 120 132 values of M (row dimension) | |||
| 4 Number of values of NRHS | |||
| 1 2 15 16 Values of NRHS (number of right hand sides) | |||
| 1 2 14 16 Values of NRHS (number of right hand sides) | |||
| 30.0 Threshold value of test ratio | |||
| T Put T to test the driver routine | |||
| T Put T to test the error exits | |||