| @@ -1,4 +1,24 @@ | |||||
| OpenBLAS ChangeLog | OpenBLAS ChangeLog | ||||
| ==================================================================== | |||||
| Version 0.2.10 | |||||
| 16-Jul-2014 | |||||
| common: | |||||
| * Added BLAS extensions as following. | |||||
| s/d/c/zaxpby, s/d/c/zimatcopy, s/d/c/zomatcopy. | |||||
| * Added OPENBLAS_CORETYPE environment for dynamic_arch. (a86d34) | |||||
| * Added NO_AVX2 flag for old binutils. (#401) | |||||
| * Support outputing the CPU corename on runtime.(#407) | |||||
| * Patched LAPACK to fix bug 114, 117, 118. | |||||
| (http://www.netlib.org/lapack/bug_list.html) | |||||
| * Disabled ?gemm3m for a work-around fix. (#400) | |||||
| x86/x86-64: | |||||
| * Fixed lots of bugs for optimized kernels on sandybridge,Haswell, | |||||
| bulldozer, and piledriver. | |||||
| https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List | |||||
| ARM: | |||||
| * Improved LAPACK testing. | |||||
| ==================================================================== | ==================================================================== | ||||
| Version 0.2.9 | Version 0.2.9 | ||||
| 10-Jun-2014 | 10-Jun-2014 | ||||
| @@ -3,7 +3,7 @@ | |||||
| # | # | ||||
| # This library's version | # This library's version | ||||
| VERSION = 0.2.10.rc2 | |||||
| VERSION = 0.2.10 | |||||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | ||||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | ||||
| @@ -25,9 +25,20 @@ VERSION = 0.2.10.rc2 | |||||
| # FC = gfortran | # FC = gfortran | ||||
| # Even you can specify cross compiler. Meanwhile, please set HOSTCC. | # Even you can specify cross compiler. Meanwhile, please set HOSTCC. | ||||
| # cross compiler for Windows | |||||
| # CC = x86_64-w64-mingw32-gcc | # CC = x86_64-w64-mingw32-gcc | ||||
| # FC = x86_64-w64-mingw32-gfortran | # FC = x86_64-w64-mingw32-gfortran | ||||
| # cross compiler for 32bit ARM | |||||
| # CC = arm-linux-gnueabihf-gcc | |||||
| # FC = arm-linux-gnueabihf-gfortran | |||||
| # cross compiler for 64bit ARM | |||||
| # CC = aarch64-linux-gnu-gcc | |||||
| # FC = aarch64-linux-gnu-gfortran | |||||
| # If you use the cross compiler, please set this host compiler. | # If you use the cross compiler, please set this host compiler. | ||||
| # HOSTCC = gcc | # HOSTCC = gcc | ||||
| @@ -88,6 +99,9 @@ NO_AFFINITY = 1 | |||||
| # and OS. However, the performance is low. | # and OS. However, the performance is low. | ||||
| # NO_AVX = 1 | # NO_AVX = 1 | ||||
| # Don't use Haswell optimizations if binutils is too old (e.g. RHEL6) | |||||
| # NO_AVX2 = 1 | |||||
| # Don't use parallel make. | # Don't use parallel make. | ||||
| # NO_PARALLEL_MAKE = 1 | # NO_PARALLEL_MAKE = 1 | ||||
| @@ -109,6 +109,10 @@ ifeq ($(BINARY), 32) | |||||
| GETARCH_FLAGS += -DNO_AVX | GETARCH_FLAGS += -DNO_AVX | ||||
| endif | endif | ||||
| ifeq ($(NO_AVX2), 1) | |||||
| GETARCH_FLAGS += -DNO_AVX2 | |||||
| endif | |||||
| ifeq ($(DEBUG), 1) | ifeq ($(DEBUG), 1) | ||||
| GETARCH_FLAGS += -g | GETARCH_FLAGS += -g | ||||
| endif | endif | ||||
| @@ -385,7 +389,10 @@ endif | |||||
| ifeq ($(ARCH), x86_64) | ifeq ($(ARCH), x86_64) | ||||
| DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | ||||
| ifneq ($(NO_AVX), 1) | ifneq ($(NO_AVX), 1) | ||||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL | |||||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||||
| endif | |||||
| ifneq ($(NO_AVX2), 1) | |||||
| DYNAMIC_CORE += HASWELL | |||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -777,6 +784,10 @@ ifeq ($(BINARY), 32) | |||||
| CCOMMON_OPT += -DNO_AVX | CCOMMON_OPT += -DNO_AVX | ||||
| endif | endif | ||||
| ifeq ($(NO_AVX2), 1) | |||||
| CCOMMON_OPT += -DNO_AVX2 | |||||
| endif | |||||
| ifdef SMP | ifdef SMP | ||||
| CCOMMON_OPT += -DSMP_SERVER | CCOMMON_OPT += -DSMP_SERVER | ||||
| @@ -1,157 +1,607 @@ | |||||
| TOPDIR = .. | TOPDIR = .. | ||||
| include $(TOPDIR)/Makefile.system | include $(TOPDIR)/Makefile.system | ||||
| CULA_INC = -I/usr/local/cula/include | |||||
| CULA_LIB = -L/usr/local/cula/lib64 -Wl,-rpath,/usr/local/cula/lib64 -lcula_fortran -lcula -lcublas | |||||
| all :: dlinpack.goto dlinpack.mkl dlinpack.acml dcholesky.goto dcholesky.mkl dcholesky.acml | |||||
| ./dlinpack.goto 4000 4000 1 | |||||
| -./dlinpack.mkl 4000 4000 1 | |||||
| -./dlinpack.acml 4000 4000 1 | |||||
| ./dcholesky.goto 4000 4000 1 | |||||
| -./dcholesky.mkl 4000 4000 1 | |||||
| -./dcholesky.acml 4000 4000 1 | |||||
| # ACML standard | |||||
| ACML=/opt/acml5.3.1/gfortran64_mp/lib | |||||
| LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm | |||||
| # ACML custom | |||||
| #ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib | |||||
| #LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm | |||||
| # Atlas Ubuntu | |||||
| #ATLAS=/usr/lib/atlas-base | |||||
| #LIBATLAS = -fopenmp $(ATLAS)/liblapack_atlas.a $(ATLAS)/libptcblas.a $(ATLAS)/libptf77blas.a $(ATLAS)/libatlas.a -lgfortran -lm | |||||
| # Atlas RHEL and Fedora | |||||
| ATLAS=/usr/lib64/atlas | |||||
| LIBATLAS = -fopenmp $(ATLAS)/liblapack.a $(ATLAS)/libptcblas.a $(ATLAS)/libptf77blas.a $(ATLAS)/libatlas.a -lgfortran -lm | |||||
| # Intel standard | |||||
| MKL=/opt/intel/mkl/lib/intel64 | |||||
| LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm | |||||
| # Intel custom | |||||
| #MKL=/home/saar/intel_mkl | |||||
| #LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm | |||||
| goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ | |||||
| scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \ | |||||
| sgemm.goto dgemm.goto cgemm.goto zgemm.goto \ | |||||
| strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \ | |||||
| strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \ | |||||
| ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \ | |||||
| ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \ | |||||
| chemm.goto zhemm.goto \ | |||||
| cherk.goto zherk.goto \ | |||||
| cher2k.goto zher2k.goto \ | |||||
| ssymm.goto dsymm.goto csymm.goto zsymm.goto | |||||
| acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ | |||||
| scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \ | |||||
| sgemm.acml dgemm.acml cgemm.acml zgemm.acml \ | |||||
| strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \ | |||||
| strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \ | |||||
| ssyrk.acml dsyrk.acml csyrk.acml zsyrk.acml \ | |||||
| ssyr2k.acml dsyr2k.acml csyr2k.acml zsyr2k.acml \ | |||||
| chemm.acml zhemm.acml \ | |||||
| cherk.acml zherk.acml \ | |||||
| cher2k.acml zher2k.acml \ | |||||
| ssymm.acml dsymm.acml csymm.acml zsymm.acml | |||||
| atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \ | |||||
| scholesky.atlas dcholesky.atlas ccholesky.atlas zcholesky.atlas \ | |||||
| sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \ | |||||
| strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \ | |||||
| strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \ | |||||
| ssyrk.atlas dsyrk.atlas csyrk.atlas zsyrk.atlas \ | |||||
| ssyr2k.atlas dsyr2k.atlas csyr2k.atlas zsyr2k.atlas \ | |||||
| chemm.atlas zhemm.atlas \ | |||||
| cherk.atlas zherk.atlas \ | |||||
| cher2k.atlas zher2k.atlas \ | |||||
| ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas | |||||
| mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | |||||
| scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \ | |||||
| sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \ | |||||
| strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \ | |||||
| strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \ | |||||
| ssyrk.mkl dsyrk.mkl csyrk.mkl zsyrk.mkl \ | |||||
| ssyr2k.mkl dsyr2k.mkl csyr2k.mkl zsyr2k.mkl \ | |||||
| chemm.mkl zhemm.mkl \ | |||||
| cherk.mkl zherk.mkl \ | |||||
| cher2k.mkl zher2k.mkl \ | |||||
| ssymm.mkl dsymm.mkl csymm.mkl zsymm.mkl | |||||
| all :: goto atlas acml mkl | |||||
| ##################################### Slinpack #################################################### | |||||
| slinpack.goto : slinpack.$(SUFFIX) ../$(LIBNAME) | slinpack.goto : slinpack.$(SUFFIX) ../$(LIBNAME) | ||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| slinpack.acml : slinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| slinpack.atlas : slinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| slinpack.mkl : slinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Dlinpack #################################################### | |||||
| dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME) | dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME) | ||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| qlinpack.goto : qlinpack.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| dlinpack.acml : dlinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dlinpack.atlas : dlinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dlinpack.mkl : dlinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Clinpack #################################################### | |||||
| clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME) | clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME) | ||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| clinpack.acml : clinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| clinpack.atlas : clinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| clinpack.mkl : clinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Zlinpack #################################################### | |||||
| zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME) | zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME) | ||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| xlinpack.goto : xlinpack.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| zlinpack.acml : zlinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zlinpack.atlas : zlinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zlinpack.mkl : zlinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Scholesky ################################################### | |||||
| scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME) | scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME) | ||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| scholesky.acml : scholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| scholesky.atlas : scholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| scholesky.mkl : scholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Dcholesky ################################################### | |||||
| dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME) | dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME) | ||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| qcholesky.goto : qcholesky.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| dcholesky.acml : dcholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dcholesky.atlas : dcholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dcholesky.mkl : dcholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Ccholesky ################################################### | |||||
| ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME) | ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME) | ||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| ccholesky.acml : ccholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ccholesky.atlas : ccholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ccholesky.mkl : ccholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zcholesky.goto : zcholesky.$(SUFFIX) ../$(LIBNAME) | zcholesky.goto : zcholesky.$(SUFFIX) ../$(LIBNAME) | ||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| ##################################### Zcholesky ################################################### | |||||
| xcholesky.goto : xcholesky.$(SUFFIX) ../$(LIBNAME) | xcholesky.goto : xcholesky.$(SUFFIX) ../$(LIBNAME) | ||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | ||||
| slinpack.mkl : slinpack.$(SUFFIX) | |||||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zcholesky.acml : zcholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dlinpack.mkl : dlinpack.$(SUFFIX) | |||||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zcholesky.atlas : zcholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| clinpack.mkl : clinpack.$(SUFFIX) | |||||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zcholesky.mkl : zcholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zlinpack.mkl : zlinpack.$(SUFFIX) | |||||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| scholesky.mkl : scholesky.$(SUFFIX) | |||||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Sgemm #################################################### | |||||
| sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| dcholesky.mkl : dcholesky.$(SUFFIX) | |||||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| sgemm.acml : sgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ccholesky.mkl : ccholesky.$(SUFFIX) | |||||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| sgemm.atlas : sgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zcholesky.mkl : zcholesky.$(SUFFIX) | |||||
| -$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| sgemm.mkl : sgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| slinpack.acml : slinpack.$(SUFFIX) | |||||
| ##################################### Dgemm #################################################### | |||||
| dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| dgemm.acml : dgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| dlinpack.acml : dlinpack.$(SUFFIX) | |||||
| dgemm.atlas : dgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dgemm.mkl : dgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Cgemm #################################################### | |||||
| cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| cgemm.acml : cgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| clinpack.acml : clinpack.$(SUFFIX) | |||||
| cgemm.atlas : cgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| cgemm.mkl : cgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Zgemm #################################################### | |||||
| zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| zgemm.acml : zgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| zlinpack.acml : zlinpack.$(SUFFIX) | |||||
| zgemm.atlas : zgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zgemm.mkl : zgemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Ssymm #################################################### | |||||
| ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| ssymm.acml : ssymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| scholesky.acml : scholesky.$(SUFFIX) | |||||
| ssymm.atlas : ssymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ssymm.mkl : ssymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Dsymm #################################################### | |||||
| dsymm.goto : dsymm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| dsymm.acml : dsymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| dcholesky.acml : dcholesky.$(SUFFIX) | |||||
| dsymm.atlas : dsymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dsymm.mkl : dsymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Csymm #################################################### | |||||
| csymm.goto : csymm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| csymm.acml : csymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| ccholesky.acml : ccholesky.$(SUFFIX) | |||||
| csymm.atlas : csymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| csymm.mkl : csymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Zsymm #################################################### | |||||
| zsymm.goto : zsymm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| zsymm.acml : zsymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| zcholesky.acml : zcholesky.$(SUFFIX) | |||||
| zsymm.atlas : zsymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zsymm.mkl : zsymm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Strmm #################################################### | |||||
| strmm.goto : strmm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| strmm.acml : strmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| strmm.atlas : strmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| strmm.mkl : strmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Dtrmm #################################################### | |||||
| dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| dtrmm.acml : dtrmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dtrmm.atlas : dtrmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dtrmm.mkl : dtrmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Ctrmm #################################################### | |||||
| ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| ctrmm.acml : ctrmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
| slinpack.flame : slinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ctrmm.atlas : ctrmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dlinpack.flame : dlinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ctrmm.mkl : ctrmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| clinpack.flame : clinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Ztrmm #################################################### | |||||
| zlinpack.flame : zlinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| scholesky.flame : scholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ztrmm.acml : ztrmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dcholesky.flame : dcholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ztrmm.atlas : ztrmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ccholesky.flame : ccholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ztrmm.mkl : ztrmm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zcholesky.flame : zcholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| slinpack.sun : slinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Strsm #################################################### | |||||
| strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| dlinpack.sun : dlinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| strsm.acml : strsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| clinpack.sun : clinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| strsm.atlas : strsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zlinpack.sun : zlinpack.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| strsm.mkl : strsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| scholesky.sun : scholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Dtrsm #################################################### | |||||
| dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| dcholesky.sun : dcholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dtrsm.acml : dtrsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ccholesky.sun : ccholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dtrsm.atlas : dtrsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zcholesky.sun : zcholesky.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dtrsm.mkl : dtrsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| slinpack.cula : slinpack.$(SUFFIX) cula_wrapper.$(SUFFIX) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CULA_LIB) ../$(LIBNAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Ctrsm #################################################### | |||||
| clinpack.cula : clinpack.$(SUFFIX) cula_wrapper.$(SUFFIX) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CULA_LIB) ../$(LIBNAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| ctrsm.acml : ctrsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| cula_wrapper.$(SUFFIX) : cula_wrapper.c | |||||
| $(CC) $(CFLAGS) -c $(CULA_INC) -o $(@F) $^ | |||||
| ctrsm.atlas : ctrsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ctrsm.mkl : ctrsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Ztrsm #################################################### | |||||
| ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| ztrsm.acml : ztrsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ztrsm.atlas : ztrsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ztrsm.mkl : ztrsm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Ssyrk #################################################### | |||||
| ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| ssyrk.acml : ssyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ssyrk.atlas : ssyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ssyrk.mkl : ssyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Dsyrk #################################################### | |||||
| dsyrk.goto : dsyrk.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| dsyrk.acml : dsyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dsyrk.atlas : dsyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dsyrk.mkl : dsyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Csyrk #################################################### | |||||
| csyrk.goto : csyrk.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| csyrk.acml : csyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| csyrk.atlas : csyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| csyrk.mkl : csyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Zsyrk #################################################### | |||||
| zsyrk.goto : zsyrk.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| zsyrk.acml : zsyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zsyrk.atlas : zsyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zsyrk.mkl : zsyrk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Ssyr2k #################################################### | |||||
| ssyr2k.goto : ssyr2k.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| ssyr2k.acml : ssyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ssyr2k.atlas : ssyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ssyr2k.mkl : ssyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Dsyr2k #################################################### | |||||
| dsyr2k.goto : dsyr2k.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| dsyr2k.acml : dsyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dsyr2k.atlas : dsyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| dsyr2k.mkl : dsyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Csyr2k #################################################### | |||||
| csyr2k.goto : csyr2k.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| csyr2k.acml : csyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| csyr2k.atlas : csyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| csyr2k.mkl : csyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Zsyr2k #################################################### | |||||
| zsyr2k.goto : zsyr2k.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| zsyr2k.acml : zsyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zsyr2k.atlas : zsyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zsyr2k.mkl : zsyr2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Chemm #################################################### | |||||
| chemm.goto : chemm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| chemm.acml : chemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| chemm.atlas : chemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| chemm.mkl : chemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Zhemm #################################################### | |||||
| zhemm.goto : zhemm.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| zhemm.acml : zhemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zhemm.atlas : zhemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zhemm.mkl : zhemm.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Cherk #################################################### | |||||
| cherk.goto : cherk.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| cherk.acml : cherk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| cherk.atlas : cherk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| cherk.mkl : cherk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Zherk #################################################### | |||||
| zherk.goto : zherk.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| zherk.acml : zherk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zherk.atlas : zherk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zherk.mkl : zherk.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Cher2k #################################################### | |||||
| cher2k.goto : cher2k.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| cher2k.acml : cher2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| cher2k.atlas : cher2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| cher2k.mkl : cher2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ##################################### Zher2k #################################################### | |||||
| zher2k.goto : zher2k.$(SUFFIX) ../$(LIBNAME) | |||||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
| zher2k.acml : zher2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zher2k.atlas : zher2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| zher2k.mkl : zher2k.$(SUFFIX) | |||||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
| ################################################################################################### | |||||
| slinpack.$(SUFFIX) : linpack.c | slinpack.$(SUFFIX) : linpack.c | ||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | ||||
| @@ -159,37 +609,119 @@ slinpack.$(SUFFIX) : linpack.c | |||||
| dlinpack.$(SUFFIX) : linpack.c | dlinpack.$(SUFFIX) : linpack.c | ||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | ||||
| qlinpack.$(SUFFIX) : linpack.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DXDOUBLE -o $(@F) $^ | |||||
| clinpack.$(SUFFIX) : linpack.c | clinpack.$(SUFFIX) : linpack.c | ||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | ||||
| zlinpack.$(SUFFIX) : linpack.c | zlinpack.$(SUFFIX) : linpack.c | ||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | ||||
| xlinpack.$(SUFFIX) : linpack.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DXDOUBLE -o $(@F) $^ | |||||
| scholesky.$(SUFFIX) : cholesky.c | scholesky.$(SUFFIX) : cholesky.c | ||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | ||||
| dcholesky.$(SUFFIX) : cholesky.c | dcholesky.$(SUFFIX) : cholesky.c | ||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | ||||
| qcholesky.$(SUFFIX) : cholesky.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DXDOUBLE -o $(@F) $^ | |||||
| ccholesky.$(SUFFIX) : cholesky.c | ccholesky.$(SUFFIX) : cholesky.c | ||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | ||||
| zcholesky.$(SUFFIX) : cholesky.c | zcholesky.$(SUFFIX) : cholesky.c | ||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | ||||
| xcholesky.$(SUFFIX) : cholesky.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DXDOUBLE -o $(@F) $^ | |||||
| sgemm.$(SUFFIX) : gemm.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| dgemm.$(SUFFIX) : gemm.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| cgemm.$(SUFFIX) : gemm.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| zgemm.$(SUFFIX) : gemm.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| ssymm.$(SUFFIX) : symm.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| dsymm.$(SUFFIX) : symm.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| csymm.$(SUFFIX) : symm.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| zsymm.$(SUFFIX) : symm.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| strmm.$(SUFFIX) : trmm.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| dtrmm.$(SUFFIX) : trmm.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| ctrmm.$(SUFFIX) : trmm.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| ztrmm.$(SUFFIX) : trmm.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| strsm.$(SUFFIX) : trsm.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| dtrsm.$(SUFFIX) : trsm.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| ctrsm.$(SUFFIX) : trsm.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| ztrsm.$(SUFFIX) : trsm.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| ssyrk.$(SUFFIX) : syrk.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| dsyrk.$(SUFFIX) : syrk.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| csyrk.$(SUFFIX) : syrk.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| zsyrk.$(SUFFIX) : syrk.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| ssyr2k.$(SUFFIX) : syr2k.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| dsyr2k.$(SUFFIX) : syr2k.c | |||||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| csyr2k.$(SUFFIX) : syr2k.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| zsyr2k.$(SUFFIX) : syr2k.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| chemm.$(SUFFIX) : hemm.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| zhemm.$(SUFFIX) : hemm.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| cherk.$(SUFFIX) : herk.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| zherk.$(SUFFIX) : herk.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| cher2k.$(SUFFIX) : her2k.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
| zher2k.$(SUFFIX) : her2k.c | |||||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
| clean :: | clean :: | ||||
| @rm -f *.goto *.mkl *.acml *.sun *.cula | |||||
| @rm -f *.goto *.mkl *.acml *.atlas | |||||
| include $(TOPDIR)/Makefile.tail | include $(TOPDIR)/Makefile.tail | ||||
| @@ -0,0 +1,210 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #undef GEMM | |||||
| #ifndef COMPLEX | |||||
| #ifdef DOUBLE | |||||
| #define GEMM BLASFUNC(dgemm) | |||||
| #else | |||||
| #define GEMM BLASFUNC(sgemm) | |||||
| #endif | |||||
| #else | |||||
| #ifdef DOUBLE | |||||
| #define GEMM BLASFUNC(zgemm) | |||||
| #else | |||||
| #define GEMM BLASFUNC(cgemm) | |||||
| #endif | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int MAIN__(int argc, char *argv[]){ | |||||
| FLOAT *a, *b, *c; | |||||
| FLOAT alpha[] = {1.0, 1.0}; | |||||
| FLOAT beta [] = {1.0, 1.0}; | |||||
| char trans='N'; | |||||
| blasint m, i, j; | |||||
| int loops = 1; | |||||
| int l; | |||||
| char *p; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1,timeg; | |||||
| argc--;argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step); | |||||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| p = getenv("OPENBLAS_LOOPS"); | |||||
| if ( p != NULL ) | |||||
| loops = atoi(p); | |||||
| #ifdef linux | |||||
| srandom(getpid()); | |||||
| #endif | |||||
| fprintf(stderr, " SIZE Flops\n"); | |||||
| for(m = from; m <= to; m += step) | |||||
| { | |||||
| timeg=0; | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for (l=0; l<loops; l++) | |||||
| { | |||||
| for(j = 0; j < m; j++){ | |||||
| for(i = 0; i < m * COMPSIZE; i++){ | |||||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| timeg += time1; | |||||
| } | |||||
| timeg /= loops; | |||||
| fprintf(stderr, | |||||
| " %10.2f MFlops\n", | |||||
| COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||||
| @@ -0,0 +1,192 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #undef HEMM | |||||
| #ifdef DOUBLE | |||||
| #define HEMM BLASFUNC(zhemm) | |||||
| #else | |||||
| #define HEMM BLASFUNC(chemm) | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int MAIN__(int argc, char *argv[]){ | |||||
| FLOAT *a, *b, *c; | |||||
| FLOAT alpha[] = {1.0, 1.0}; | |||||
| FLOAT beta [] = {1.0, 1.0}; | |||||
| char *p; | |||||
| char side='L'; | |||||
| char uplo='U'; | |||||
| if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | |||||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||||
| blasint m, i, j; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1; | |||||
| argc--;argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo); | |||||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| #ifdef linux | |||||
| srandom(getpid()); | |||||
| #endif | |||||
| fprintf(stderr, " SIZE Flops\n"); | |||||
| for(m = from; m <= to; m += step) | |||||
| { | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for(j = 0; j < m; j++){ | |||||
| for(i = 0; i < m * COMPSIZE; i++){ | |||||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| fprintf(stderr, | |||||
| " %10.2f MFlops\n", | |||||
| COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||||
| @@ -0,0 +1,191 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #undef HER2K | |||||
| #ifdef DOUBLE | |||||
| #define HER2K BLASFUNC(zher2k) | |||||
| #else | |||||
| #define HER2K BLASFUNC(cher2k) | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int MAIN__(int argc, char *argv[]){ | |||||
| FLOAT *a, *b, *c; | |||||
| FLOAT alpha[] = {1.0, 1.0}; | |||||
| FLOAT beta [] = {1.0, 1.0}; | |||||
| char *p; | |||||
| char uplo='U'; | |||||
| char trans='N'; | |||||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||||
| blasint m, i, j; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1; | |||||
| argc--;argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| #ifdef linux | |||||
| srandom(getpid()); | |||||
| #endif | |||||
| fprintf(stderr, " SIZE Flops\n"); | |||||
| for(m = from; m <= to; m += step) | |||||
| { | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for(j = 0; j < m; j++){ | |||||
| for(i = 0; i < m * COMPSIZE; i++){ | |||||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| fprintf(stderr, | |||||
| " %10.2f MFlops\n", | |||||
| COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||||
| @@ -0,0 +1,189 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #undef HERK | |||||
| #ifdef DOUBLE | |||||
| #define HERK BLASFUNC(zherk) | |||||
| #else | |||||
| #define HERK BLASFUNC(cherk) | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int MAIN__(int argc, char *argv[]){ | |||||
| FLOAT *a, *c; | |||||
| FLOAT alpha[] = {1.0, 1.0}; | |||||
| FLOAT beta [] = {1.0, 1.0}; | |||||
| char *p; | |||||
| char uplo='U'; | |||||
| char trans='N'; | |||||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||||
| blasint m, i, j; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1; | |||||
| argc--;argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| #ifdef linux | |||||
| srandom(getpid()); | |||||
| #endif | |||||
| fprintf(stderr, " SIZE Flops\n"); | |||||
| for(m = from; m <= to; m += step) | |||||
| { | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for(j = 0; j < m; j++){ | |||||
| for(i = 0; i < m * COMPSIZE; i++){ | |||||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| fprintf(stderr, | |||||
| " %10.2f MFlops\n", | |||||
| COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||||
| @@ -0,0 +1,203 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #undef SYMM | |||||
| #ifndef COMPLEX | |||||
| #ifdef DOUBLE | |||||
| #define SYMM BLASFUNC(dsymm) | |||||
| #else | |||||
| #define SYMM BLASFUNC(ssymm) | |||||
| #endif | |||||
| #else | |||||
| #ifdef DOUBLE | |||||
| #define SYMM BLASFUNC(zsymm) | |||||
| #else | |||||
| #define SYMM BLASFUNC(csymm) | |||||
| #endif | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int MAIN__(int argc, char *argv[]){ | |||||
| FLOAT *a, *b, *c; | |||||
| FLOAT alpha[] = {1.0, 1.0}; | |||||
| FLOAT beta [] = {1.0, 1.0}; | |||||
| char *p; | |||||
| char side='L'; | |||||
| char uplo='U'; | |||||
| if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | |||||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||||
| blasint m, i, j; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1; | |||||
| argc--;argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo); | |||||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| #ifdef linux | |||||
| srandom(getpid()); | |||||
| #endif | |||||
| fprintf(stderr, " SIZE Flops\n"); | |||||
| for(m = from; m <= to; m += step) | |||||
| { | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for(j = 0; j < m; j++){ | |||||
| for(i = 0; i < m * COMPSIZE; i++){ | |||||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| fprintf(stderr, | |||||
| " %10.2f MFlops\n", | |||||
| COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||||
| @@ -0,0 +1,203 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #undef SYR2K | |||||
| #ifndef COMPLEX | |||||
| #ifdef DOUBLE | |||||
| #define SYR2K BLASFUNC(dsyr2k) | |||||
| #else | |||||
| #define SYR2K BLASFUNC(ssyr2k) | |||||
| #endif | |||||
| #else | |||||
| #ifdef DOUBLE | |||||
| #define SYR2K BLASFUNC(zsyr2k) | |||||
| #else | |||||
| #define SYR2K BLASFUNC(csyr2k) | |||||
| #endif | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int MAIN__(int argc, char *argv[]){ | |||||
| FLOAT *a, *b, *c; | |||||
| FLOAT alpha[] = {1.0, 1.0}; | |||||
| FLOAT beta [] = {1.0, 1.0}; | |||||
| char *p; | |||||
| char uplo='U'; | |||||
| char trans='N'; | |||||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||||
| blasint m, i, j; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1; | |||||
| argc--;argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| #ifdef linux | |||||
| srandom(getpid()); | |||||
| #endif | |||||
| fprintf(stderr, " SIZE Flops\n"); | |||||
| for(m = from; m <= to; m += step) | |||||
| { | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for(j = 0; j < m; j++){ | |||||
| for(i = 0; i < m * COMPSIZE; i++){ | |||||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| fprintf(stderr, | |||||
| " %10.2f MFlops\n", | |||||
| COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||||
| @@ -0,0 +1,199 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #undef SYRK | |||||
| #ifndef COMPLEX | |||||
| #ifdef DOUBLE | |||||
| #define SYRK BLASFUNC(dsyrk) | |||||
| #else | |||||
| #define SYRK BLASFUNC(ssyrk) | |||||
| #endif | |||||
| #else | |||||
| #ifdef DOUBLE | |||||
| #define SYRK BLASFUNC(zsyrk) | |||||
| #else | |||||
| #define SYRK BLASFUNC(csyrk) | |||||
| #endif | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int MAIN__(int argc, char *argv[]){ | |||||
| FLOAT *a, *c; | |||||
| FLOAT alpha[] = {1.0, 1.0}; | |||||
| FLOAT beta [] = {1.0, 1.0}; | |||||
| char *p; | |||||
| char uplo='U'; | |||||
| char trans='N'; | |||||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||||
| blasint m, i, j; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1; | |||||
| argc--;argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| #ifdef linux | |||||
| srandom(getpid()); | |||||
| #endif | |||||
| fprintf(stderr, " SIZE Flops\n"); | |||||
| for(m = from; m <= to; m += step) | |||||
| { | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for(j = 0; j < m; j++){ | |||||
| for(i = 0; i < m * COMPSIZE; i++){ | |||||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| fprintf(stderr, | |||||
| " %10.2f MFlops\n", | |||||
| COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||||
| @@ -0,0 +1,202 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #undef TRMM | |||||
| #ifndef COMPLEX | |||||
| #ifdef DOUBLE | |||||
| #define TRMM BLASFUNC(dtrmm) | |||||
| #else | |||||
| #define TRMM BLASFUNC(strmm) | |||||
| #endif | |||||
| #else | |||||
| #ifdef DOUBLE | |||||
| #define TRMM BLASFUNC(ztrmm) | |||||
| #else | |||||
| #define TRMM BLASFUNC(ctrmm) | |||||
| #endif | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int MAIN__(int argc, char *argv[]){ | |||||
| FLOAT *a, *b; | |||||
| FLOAT alpha[] = {1.0, 1.0}; | |||||
| FLOAT beta [] = {1.0, 1.0}; | |||||
| char *p; | |||||
| char side ='L'; | |||||
| char uplo ='U'; | |||||
| char trans='N'; | |||||
| char diag ='U'; | |||||
| if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | |||||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||||
| if ((p = getenv("OPENBLAS_DIAG"))) diag=*p; | |||||
| blasint m, i, j; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1; | |||||
| argc--;argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag); | |||||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| #ifdef linux | |||||
| srandom(getpid()); | |||||
| #endif | |||||
| fprintf(stderr, " SIZE Flops\n"); | |||||
| for(m = from; m <= to; m += step) | |||||
| { | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for(j = 0; j < m; j++){ | |||||
| for(i = 0; i < m * COMPSIZE; i++){ | |||||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| fprintf(stderr, | |||||
| " %10.2f MFlops\n", | |||||
| COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||||
| @@ -0,0 +1,202 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #undef TRSM | |||||
| #ifndef COMPLEX | |||||
| #ifdef DOUBLE | |||||
| #define TRSM BLASFUNC(dtrsm) | |||||
| #else | |||||
| #define TRSM BLASFUNC(strsm) | |||||
| #endif | |||||
| #else | |||||
| #ifdef DOUBLE | |||||
| #define TRSM BLASFUNC(ztrsm) | |||||
| #else | |||||
| #define TRSM BLASFUNC(ctrsm) | |||||
| #endif | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int MAIN__(int argc, char *argv[]){ | |||||
| FLOAT *a, *b; | |||||
| FLOAT alpha[] = {1.0, 1.0}; | |||||
| FLOAT beta [] = {1.0, 1.0}; | |||||
| char *p; | |||||
| char side ='L'; | |||||
| char uplo ='U'; | |||||
| char trans='N'; | |||||
| char diag ='U'; | |||||
| if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | |||||
| if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||||
| if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||||
| if ((p = getenv("OPENBLAS_DIAG"))) diag=*p; | |||||
| blasint m, i, j; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1; | |||||
| argc--;argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag); | |||||
| if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| } | |||||
| #ifdef linux | |||||
| srandom(getpid()); | |||||
| #endif | |||||
| fprintf(stderr, " SIZE Flops\n"); | |||||
| for(m = from; m <= to; m += step) | |||||
| { | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for(j = 0; j < m; j++){ | |||||
| for(i = 0; i < m * COMPSIZE; i++){ | |||||
| a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| fprintf(stderr, | |||||
| " %10.2f MFlops\n", | |||||
| COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||||
| @@ -1062,7 +1062,11 @@ int get_cpuname(void){ | |||||
| case 12: | case 12: | ||||
| case 15: | case 15: | ||||
| if(support_avx()) | if(support_avx()) | ||||
| #ifndef NO_AVX2 | |||||
| return CPUTYPE_HASWELL; | return CPUTYPE_HASWELL; | ||||
| #else | |||||
| return CPUTYPE_SANDYBRIDGE; | |||||
| #endif | |||||
| else | else | ||||
| return CPUTYPE_NEHALEM; | return CPUTYPE_NEHALEM; | ||||
| } | } | ||||
| @@ -1072,7 +1076,11 @@ int get_cpuname(void){ | |||||
| case 5: | case 5: | ||||
| case 6: | case 6: | ||||
| if(support_avx()) | if(support_avx()) | ||||
| #ifndef NO_AVX2 | |||||
| return CPUTYPE_HASWELL; | return CPUTYPE_HASWELL; | ||||
| #else | |||||
| return CPUTYPE_SANDYBRIDGE; | |||||
| #endif | |||||
| else | else | ||||
| return CPUTYPE_NEHALEM; | return CPUTYPE_NEHALEM; | ||||
| } | } | ||||
| @@ -1471,7 +1479,11 @@ int get_coretype(void){ | |||||
| case 12: | case 12: | ||||
| case 15: | case 15: | ||||
| if(support_avx()) | if(support_avx()) | ||||
| #ifndef NO_AVX2 | |||||
| return CORE_HASWELL; | return CORE_HASWELL; | ||||
| #else | |||||
| return CORE_SANDYBRIDGE; | |||||
| #endif | |||||
| else | else | ||||
| return CORE_NEHALEM; | return CORE_NEHALEM; | ||||
| } | } | ||||
| @@ -1481,7 +1493,11 @@ int get_coretype(void){ | |||||
| case 5: | case 5: | ||||
| case 6: | case 6: | ||||
| if(support_avx()) | if(support_avx()) | ||||
| #ifndef NO_AVX2 | |||||
| return CORE_HASWELL; | return CORE_HASWELL; | ||||
| #else | |||||
| return CORE_SANDYBRIDGE; | |||||
| #endif | |||||
| else | else | ||||
| return CORE_NEHALEM; | return CORE_NEHALEM; | ||||
| } | } | ||||
| @@ -66,7 +66,11 @@ extern gotoblas_t gotoblas_BOBCAT; | |||||
| extern gotoblas_t gotoblas_SANDYBRIDGE; | extern gotoblas_t gotoblas_SANDYBRIDGE; | ||||
| extern gotoblas_t gotoblas_BULLDOZER; | extern gotoblas_t gotoblas_BULLDOZER; | ||||
| extern gotoblas_t gotoblas_PILEDRIVER; | extern gotoblas_t gotoblas_PILEDRIVER; | ||||
| #ifdef NO_AVX2 | |||||
| #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | |||||
| #else | |||||
| extern gotoblas_t gotoblas_HASWELL; | extern gotoblas_t gotoblas_HASWELL; | ||||
| #endif | |||||
| #else | #else | ||||
| //Use NEHALEM kernels for sandy bridge | //Use NEHALEM kernels for sandy bridge | ||||
| #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | ||||
| @@ -356,25 +356,25 @@ ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) | |||||
| XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) | XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) | ||||
| #SLAPACKOBJS = \ | #SLAPACKOBJS = \ | ||||
| # sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \ | |||||
| # spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \ | |||||
| # slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \ | |||||
| # sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ | |||||
| # spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ | |||||
| # slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX) | |||||
| SLAPACKOBJS = \ | SLAPACKOBJS = \ | ||||
| sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ | sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ | ||||
| spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ | spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ | ||||
| slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX) | |||||
| slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) | |||||
| #DLAPACKOBJS = \ | #DLAPACKOBJS = \ | ||||
| # dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \ | |||||
| # dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \ | |||||
| # dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \ | |||||
| # dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ | |||||
| # dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ | |||||
| # dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX) | |||||
| DLAPACKOBJS = \ | DLAPACKOBJS = \ | ||||
| dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ | dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ | ||||
| dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ | dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ | ||||
| dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX) | |||||
| dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) | |||||
| QLAPACKOBJS = \ | QLAPACKOBJS = \ | ||||
| @@ -382,28 +382,29 @@ QLAPACKOBJS = \ | |||||
| qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \ | qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \ | ||||
| qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \ | qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \ | ||||
| #CLAPACKOBJS = \ | #CLAPACKOBJS = \ | ||||
| # cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \ | |||||
| # cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \ | |||||
| # claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \ | |||||
| # cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ | |||||
| # cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ | |||||
| # clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) | |||||
| CLAPACKOBJS = \ | CLAPACKOBJS = \ | ||||
| cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ | cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ | ||||
| cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ | cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ | ||||
| clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) | |||||
| clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) | |||||
| #ZLAPACKOBJS = \ | #ZLAPACKOBJS = \ | ||||
| # zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \ | |||||
| # zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \ | |||||
| # zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \ | |||||
| # zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ | |||||
| # zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ | |||||
| # zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX) | |||||
| ZLAPACKOBJS = \ | ZLAPACKOBJS = \ | ||||
| zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ | zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ | ||||
| zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ | zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ | ||||
| zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX) | |||||
| zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) | |||||
| XLAPACKOBJS = \ | XLAPACKOBJS = \ | ||||
| @@ -1,5 +1,7 @@ | |||||
| SGEMVNKERNEL = ../arm/gemv_n.c | SGEMVNKERNEL = ../arm/gemv_n.c | ||||
| SGEMVTKERNEL = ../arm/gemv_t.c | SGEMVTKERNEL = ../arm/gemv_t.c | ||||
| CGEMVNKERNEL = ../arm/zgemv_n.c | |||||
| CGEMVTKERNEL = ../arm/zgemv_t.c | |||||
| DGEMVNKERNEL = ../arm/gemv_n.c | DGEMVNKERNEL = ../arm/gemv_n.c | ||||
| DGEMVTKERNEL = ../arm/gemv_t.c | DGEMVTKERNEL = ../arm/gemv_t.c | ||||
| @@ -96,12 +98,12 @@ ZSWAPKERNEL = swap_vfp.S | |||||
| # BAD SGEMVNKERNEL = gemv_n_vfp.S | # BAD SGEMVNKERNEL = gemv_n_vfp.S | ||||
| # BAD DGEMVNKERNEL = gemv_n_vfp.S | # BAD DGEMVNKERNEL = gemv_n_vfp.S | ||||
| CGEMVNKERNEL = cgemv_n_vfp.S | |||||
| # CGEMVNKERNEL = cgemv_n_vfp.S | |||||
| ZGEMVNKERNEL = zgemv_n_vfp.S | ZGEMVNKERNEL = zgemv_n_vfp.S | ||||
| # BAD SGEMVTKERNEL = gemv_t_vfp.S | # BAD SGEMVTKERNEL = gemv_t_vfp.S | ||||
| # BAD DGEMVTKERNEL = gemv_t_vfp.S | # BAD DGEMVTKERNEL = gemv_t_vfp.S | ||||
| CGEMVTKERNEL = cgemv_t_vfp.S | |||||
| # CGEMVTKERNEL = cgemv_t_vfp.S | |||||
| ZGEMVTKERNEL = zgemv_t_vfp.S | ZGEMVTKERNEL = zgemv_t_vfp.S | ||||
| STRMMKERNEL = strmm_kernel_4x2_vfp.S | STRMMKERNEL = strmm_kernel_4x2_vfp.S | ||||
| @@ -1,5 +1,7 @@ | |||||
| SGEMVNKERNEL = ../arm/gemv_n.c | SGEMVNKERNEL = ../arm/gemv_n.c | ||||
| SGEMVTKERNEL = ../arm/gemv_t.c | SGEMVTKERNEL = ../arm/gemv_t.c | ||||
| CGEMVNKERNEL = ../arm/zgemv_n.c | |||||
| CGEMVTKERNEL = ../arm/zgemv_t.c | |||||
| ################################################################################# | ################################################################################# | ||||
| @@ -77,12 +79,12 @@ ZSCALKERNEL = zscal.c | |||||
| # BAD SGEMVNKERNEL = gemv_n_vfp.S | # BAD SGEMVNKERNEL = gemv_n_vfp.S | ||||
| DGEMVNKERNEL = gemv_n_vfp.S | DGEMVNKERNEL = gemv_n_vfp.S | ||||
| CGEMVNKERNEL = cgemv_n_vfp.S | |||||
| #CGEMVNKERNEL = cgemv_n_vfp.S | |||||
| ZGEMVNKERNEL = zgemv_n_vfp.S | ZGEMVNKERNEL = zgemv_n_vfp.S | ||||
| # BAD SGEMVTKERNEL = gemv_t_vfp.S | # BAD SGEMVTKERNEL = gemv_t_vfp.S | ||||
| DGEMVTKERNEL = gemv_t_vfp.S | DGEMVTKERNEL = gemv_t_vfp.S | ||||
| CGEMVTKERNEL = cgemv_t_vfp.S | |||||
| #CGEMVTKERNEL = cgemv_t_vfp.S | |||||
| ZGEMVTKERNEL = zgemv_t_vfp.S | ZGEMVTKERNEL = zgemv_t_vfp.S | ||||
| STRMMKERNEL = strmm_kernel_4x4_vfpv3.S | STRMMKERNEL = strmm_kernel_4x4_vfpv3.S | ||||
| @@ -1,263 +1,3 @@ | |||||
| GEMVDEP = ../l2param.h | |||||
| ifdef HAVE_SSE | |||||
| ifndef SAMAXKERNEL | |||||
| SAMAXKERNEL = amax_sse.S | |||||
| endif | |||||
| ifndef CAMAXKERNEL | |||||
| CAMAXKERNEL = zamax_sse.S | |||||
| endif | |||||
| ifndef SAMINKERNEL | |||||
| SAMINKERNEL = amax_sse.S | |||||
| endif | |||||
| ifndef CAMINKERNEL | |||||
| CAMINKERNEL = zamax_sse.S | |||||
| endif | |||||
| ifndef ISAMAXKERNEL | |||||
| ISAMAXKERNEL = iamax_sse.S | |||||
| endif | |||||
| ifndef ICAMAXKERNEL | |||||
| ICAMAXKERNEL = izamax_sse.S | |||||
| endif | |||||
| ifndef ISAMINKERNEL | |||||
| ISAMINKERNEL = iamax_sse.S | |||||
| endif | |||||
| ifndef ICAMINKERNEL | |||||
| ICAMINKERNEL = izamax_sse.S | |||||
| endif | |||||
| ifndef ISMAXKERNEL | |||||
| ISMAXKERNEL = iamax_sse.S | |||||
| endif | |||||
| ifndef ISMINKERNEL | |||||
| ISMINKERNEL = iamax_sse.S | |||||
| endif | |||||
| ifndef SMAXKERNEL | |||||
| SMAXKERNEL = amax_sse.S | |||||
| endif | |||||
| ifndef SMINKERNEL | |||||
| SMINKERNEL = amax_sse.S | |||||
| endif | |||||
| ifndef SASUMKERNEL | |||||
| SASUMKERNEL = asum_sse.S | |||||
| endif | |||||
| ifndef CASUMKERNEL | |||||
| CASUMKERNEL = zasum_sse.S | |||||
| endif | |||||
| ifndef SDOTKERNEL | |||||
| SDOTKERNEL = ../arm/dot.c | |||||
| endif | |||||
| ifndef CDOTKERNEL | |||||
| CDOTKERNEL = zdot_sse.S | |||||
| endif | |||||
| ifndef SCOPYKERNEL | |||||
| SCOPYKERNEL = copy_sse.S | |||||
| endif | |||||
| ifndef CCOPYKERNEL | |||||
| CCOPYKERNEL = zcopy_sse.S | |||||
| endif | |||||
| ifndef SSACALKERNEL | |||||
| SSCALKERNEL = scal_sse.S | |||||
| endif | |||||
| ifndef CSACALKERNEL | |||||
| CSCALKERNEL = zscal_sse.S | |||||
| endif | |||||
| ifndef SAXPYKERNEL | |||||
| SAXPYKERNEL = axpy_sse.S | |||||
| endif | |||||
| ifndef CAXPYKERNEL | |||||
| CAXPYKERNEL = zaxpy_sse.S | |||||
| endif | |||||
| ifndef SROTKERNEL | |||||
| SROTKERNEL = rot_sse.S | |||||
| endif | |||||
| ifndef CROTKERNEL | |||||
| CROTKERNEL = zrot_sse.S | |||||
| endif | |||||
| ifndef SSWAPKERNEL | |||||
| SSWAPKERNEL = swap_sse.S | |||||
| endif | |||||
| ifndef CSWAPKERNEL | |||||
| CSWAPKERNEL = zswap_sse.S | |||||
| endif | |||||
| ifndef SGEMVNKERNEL | |||||
| SGEMVNKERNEL = ../arm/gemv_n.c | |||||
| endif | |||||
| ifndef SGEMVTKERNEL | |||||
| SGEMVTKERNEL = ../arm/gemv_t.c | |||||
| endif | |||||
| ifndef CGEMVNKERNEL | |||||
| CGEMVNKERNEL = zgemv_n_sse.S | |||||
| endif | |||||
| ifndef CGEMVTKERNEL | |||||
| CGEMVTKERNEL = zgemv_t_sse.S | |||||
| endif | |||||
| endif | |||||
| ifdef HAVE_SSE2 | |||||
| ifndef DAMAXKERNEL | |||||
| DAMAXKERNEL = amax_sse2.S | |||||
| endif | |||||
| ifndef ZAMAXKERNEL | |||||
| ZAMAXKERNEL = zamax_sse2.S | |||||
| endif | |||||
| ifndef DAMINKERNEL | |||||
| DAMINKERNEL = amax_sse2.S | |||||
| endif | |||||
| ifndef ZAMINKERNEL | |||||
| ZAMINKERNEL = zamax_sse2.S | |||||
| endif | |||||
| ifndef IDAMAXKERNEL | |||||
| IDAMAXKERNEL = iamax_sse2.S | |||||
| endif | |||||
| ifndef IZAMAXKERNEL | |||||
| IZAMAXKERNEL = izamax_sse2.S | |||||
| endif | |||||
| ifndef IDAMINKERNEL | |||||
| IDAMINKERNEL = iamax_sse2.S | |||||
| endif | |||||
| ifndef IZAMINKERNEL | |||||
| IZAMINKERNEL = izamax_sse2.S | |||||
| endif | |||||
| ifndef IDMAXKERNEL | |||||
| IDMAXKERNEL = iamax_sse2.S | |||||
| endif | |||||
| ifndef IDMINKERNEL | |||||
| IDMINKERNEL = iamax_sse2.S | |||||
| endif | |||||
| ifndef DMAXKERNEL | |||||
| DMAXKERNEL = amax_sse2.S | |||||
| endif | |||||
| ifndef DMINKERNEL | |||||
| DMINKERNEL = amax_sse2.S | |||||
| endif | |||||
| ifndef DDOTKERNEL | |||||
| DDOTKERNEL = dot_sse2.S | |||||
| endif | |||||
| ifndef ZDOTKERNEL | |||||
| ZDOTKERNEL = zdot_sse2.S | |||||
| endif | |||||
| ifndef DCOPYKERNEL | |||||
| # DCOPYKERNEL = copy_sse2.S | |||||
| endif | |||||
| ifndef ZCOPYKERNEL | |||||
| ZCOPYKERNEL = zcopy_sse2.S | |||||
| endif | |||||
| ifndef DSACALKERNEL | |||||
| DSCALKERNEL = scal_sse2.S | |||||
| endif | |||||
| ifndef ZSACALKERNEL | |||||
| ZSCALKERNEL = zscal_sse2.S | |||||
| endif | |||||
| ifndef DASUMKERNEL | |||||
| DASUMKERNEL = asum_sse2.S | |||||
| endif | |||||
| ifndef ZASUMKERNEL | |||||
| ZASUMKERNEL = zasum_sse2.S | |||||
| endif | |||||
| ifndef DAXPYKERNEL | |||||
| DAXPYKERNEL = axpy_sse2.S | |||||
| endif | |||||
| ifndef ZAXPYKERNEL | |||||
| ZAXPYKERNEL = zaxpy_sse2.S | |||||
| endif | |||||
| ifndef SNRM2KERNEL | |||||
| SNRM2KERNEL = nrm2_sse.S | |||||
| endif | |||||
| ifndef CNRM2KERNEL | |||||
| CNRM2KERNEL = znrm2_sse.S | |||||
| endif | |||||
| ifndef DROTKERNEL | |||||
| DROTKERNEL = rot_sse2.S | |||||
| endif | |||||
| ifndef ZROTKERNEL | |||||
| ZROTKERNEL = zrot_sse2.S | |||||
| endif | |||||
| ifndef DSWAPKERNEL | |||||
| DSWAPKERNEL = swap_sse2.S | |||||
| endif | |||||
| ifndef ZSWAPKERNEL | |||||
| ZSWAPKERNEL = zswap_sse2.S | |||||
| endif | |||||
| ifndef DGEMVNKERNEL | |||||
| DGEMVNKERNEL = gemv_n_sse2.S | |||||
| endif | |||||
| ifndef DGEMVTKERNEL | |||||
| DGEMVTKERNEL = gemv_t_sse2.S | |||||
| endif | |||||
| ifndef ZGEMVNKERNEL | |||||
| ZGEMVNKERNEL = zgemv_n_sse2.S | |||||
| endif | |||||
| ifndef ZGEMVTKERNEL | |||||
| ZGEMVTKERNEL = zgemv_t_sse2.S | |||||
| endif | |||||
| endif | |||||
| ifndef SAMINKERNEL | ifndef SAMINKERNEL | ||||
| SAMINKERNEL = amax.S | SAMINKERNEL = amax.S | ||||
| endif | endif | ||||
| @@ -394,21 +134,41 @@ XGEMMITCOPYOBJ = | |||||
| XGEMMONCOPYOBJ = xgemm_oncopy$(TSUFFIX).$(SUFFIX) | XGEMMONCOPYOBJ = xgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||
| XGEMMOTCOPYOBJ = xgemm_otcopy$(TSUFFIX).$(SUFFIX) | XGEMMOTCOPYOBJ = xgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||
| SGEMM_BETA = gemm_beta.S | |||||
| DGEMM_BETA = gemm_beta.S | |||||
| QGEMM_BETA = ../generic/gemm_beta.c | |||||
| CGEMM_BETA = zgemm_beta.S | |||||
| ZGEMM_BETA = zgemm_beta.S | |||||
| XGEMM_BETA = ../generic/zgemm_beta.c | |||||
| QTRSMKERNEL_LN = qtrsm_kernel_LN_2x2.S | |||||
| QTRSMKERNEL_LT = qtrsm_kernel_LT_2x2.S | |||||
| QTRSMKERNEL_RN = qtrsm_kernel_LT_2x2.S | |||||
| QTRSMKERNEL_RT = qtrsm_kernel_RT_2x2.S | |||||
| QTRSMKERNEL_LN = qtrsm_kernel_LN_2x2.S | |||||
| QTRSMKERNEL_LT = qtrsm_kernel_LT_2x2.S | |||||
| QTRSMKERNEL_RN = qtrsm_kernel_LT_2x2.S | |||||
| QTRSMKERNEL_RT = qtrsm_kernel_RT_2x2.S | |||||
| XTRSMKERNEL_LN = xtrsm_kernel_LT_1x1.S | |||||
| XTRSMKERNEL_LT = xtrsm_kernel_LT_1x1.S | |||||
| XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S | |||||
| XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S | |||||
| XTRSMKERNEL_LN = xtrsm_kernel_LT_1x1.S | |||||
| XTRSMKERNEL_LT = xtrsm_kernel_LT_1x1.S | |||||
| XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S | |||||
| XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S | |||||
| XGEMM3MKERNEL = xgemm3m_kernel_2x2.S | XGEMM3MKERNEL = xgemm3m_kernel_2x2.S | ||||
| # bug in zdot assembler kernel | |||||
| ifndef ZDOTKERNEL | |||||
| ZDOTKERNEL = ../arm/zdot.c | |||||
| endif | |||||
| DSDOTKERNEL = ../arm/dot.c | |||||
| # Bug in znrm2 assembler kernel | |||||
| ifndef ZNRM2KERNEL | |||||
| ZNRM2KERNEL = ../arm/znrm2.c | |||||
| endif | |||||
| # Bug in zgemv_t assembler kernel | |||||
| ifndef ZGEMVTKERNEL | |||||
| ZGEMVTKERNEL = ../arm/zgemv_t.c | |||||
| endif | |||||
| SGEMM_BETA = ../generic/gemm_beta.c | |||||
| DGEMM_BETA = ../generic/gemm_beta.c | |||||
| CGEMM_BETA = ../generic/zgemm_beta.c | |||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | |||||
| QGEMM_BETA = ../generic/gemm_beta.c | |||||
| XGEMM_BETA = ../generic/zgemm_beta.c | |||||
| @@ -1,6 +1,3 @@ | |||||
| SGEMVNKERNEL = sgemv_n.S | |||||
| SGEMVTKERNEL = sgemv_t.S | |||||
| ZGEMVNKERNEL = zgemv_n_dup.S | ZGEMVNKERNEL = zgemv_n_dup.S | ||||
| ZGEMVTKERNEL = zgemv_t.S | ZGEMVTKERNEL = zgemv_t.S | ||||
| @@ -1,5 +1,3 @@ | |||||
| SGEMVNKERNEL = sgemv_n.S | |||||
| SGEMVTKERNEL = sgemv_t.S | |||||
| ZGEMVNKERNEL = zgemv_n_dup.S | ZGEMVNKERNEL = zgemv_n_dup.S | ||||
| ZGEMVTKERNEL = zgemv_t.S | ZGEMVTKERNEL = zgemv_t.S | ||||
| @@ -1,6 +1,3 @@ | |||||
| SGEMVNKERNEL = sgemv_n.S | |||||
| SGEMVTKERNEL = sgemv_t.S | |||||
| SGEMMKERNEL = sgemm_kernel_16x4_haswell.S | SGEMMKERNEL = sgemm_kernel_16x4_haswell.S | ||||
| SGEMMINCOPY = ../generic/gemm_ncopy_16.c | SGEMMINCOPY = ../generic/gemm_ncopy_16.c | ||||
| @@ -1,6 +1,3 @@ | |||||
| SGEMVNKERNEL = sgemv_n.S | |||||
| SGEMVTKERNEL = sgemv_t.S | |||||
| SGEMMKERNEL = gemm_kernel_4x8_nehalem.S | SGEMMKERNEL = gemm_kernel_4x8_nehalem.S | ||||
| SGEMMINCOPY = gemm_ncopy_4.S | SGEMMINCOPY = gemm_ncopy_4.S | ||||
| @@ -1,5 +1,3 @@ | |||||
| SGEMVNKERNEL = sgemv_n.S | |||||
| SGEMVTKERNEL = sgemv_t.S | |||||
| ZGEMVNKERNEL = zgemv_n_dup.S | ZGEMVNKERNEL = zgemv_n_dup.S | ||||
| ZGEMVTKERNEL = zgemv_t.S | ZGEMVTKERNEL = zgemv_t.S | ||||
| @@ -1,5 +1,3 @@ | |||||
| SGEMVNKERNEL = sgemv_n.S | |||||
| SGEMVTKERNEL = sgemv_t.S | |||||
| SGEMMKERNEL = sgemm_kernel_16x4_sandy.S | SGEMMKERNEL = sgemm_kernel_16x4_sandy.S | ||||
| SGEMMINCOPY = ../generic/gemm_ncopy_16.c | SGEMMINCOPY = ../generic/gemm_ncopy_16.c | ||||
| @@ -155,7 +155,7 @@ SLASRC = \ | |||||
| sbbcsd.o slapmr.o sorbdb.o sorbdb1.o sorbdb2.o sorbdb3.o sorbdb4.o \ | sbbcsd.o slapmr.o sorbdb.o sorbdb1.o sorbdb2.o sorbdb3.o sorbdb4.o \ | ||||
| sorbdb5.o sorbdb6.o sorcsd.o sorcsd2by1.o \ | sorbdb5.o sorbdb6.o sorcsd.o sorcsd2by1.o \ | ||||
| sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \ | sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \ | ||||
| stpqrt.o stpqrt2.o stpmqrt.o stprfb.o | |||||
| stpqrt.o stpqrt2.o stpmqrt.o stprfb.o spotri.o | |||||
| DSLASRC = spotrs.o | DSLASRC = spotrs.o | ||||
| @@ -236,7 +236,7 @@ CLASRC = \ | |||||
| cbbcsd.o clapmr.o cunbdb.o cunbdb1.o cunbdb2.o cunbdb3.o cunbdb4.o \ | cbbcsd.o clapmr.o cunbdb.o cunbdb1.o cunbdb2.o cunbdb3.o cunbdb4.o \ | ||||
| cunbdb5.o cunbdb6.o cuncsd.o cuncsd2by1.o \ | cunbdb5.o cunbdb6.o cuncsd.o cuncsd2by1.o \ | ||||
| cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \ | cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \ | ||||
| ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o | |||||
| ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o cpotri.o | |||||
| ifdef USEXBLAS | ifdef USEXBLAS | ||||
| CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ | CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ | ||||
| @@ -316,7 +316,7 @@ DLASRC = \ | |||||
| dbbcsd.o dlapmr.o dorbdb.o dorbdb1.o dorbdb2.o dorbdb3.o dorbdb4.o \ | dbbcsd.o dlapmr.o dorbdb.o dorbdb1.o dorbdb2.o dorbdb3.o dorbdb4.o \ | ||||
| dorbdb5.o dorbdb6.o dorcsd.o dorcsd2by1.o \ | dorbdb5.o dorbdb6.o dorcsd.o dorcsd2by1.o \ | ||||
| dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \ | dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \ | ||||
| dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o | |||||
| dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o dpotri.o | |||||
| ifdef USEXBLAS | ifdef USEXBLAS | ||||
| DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ | DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ | ||||
| @@ -400,7 +400,7 @@ ZLASRC = \ | |||||
| zbbcsd.o zlapmr.o zunbdb.o zunbdb1.o zunbdb2.o zunbdb3.o zunbdb4.o \ | zbbcsd.o zlapmr.o zunbdb.o zunbdb1.o zunbdb2.o zunbdb3.o zunbdb4.o \ | ||||
| zunbdb5.o zunbdb6.o zuncsd.o zuncsd2by1.o \ | zunbdb5.o zunbdb6.o zuncsd.o zuncsd2by1.o \ | ||||
| zgeqrt.o zgeqrt2.o zgeqrt3.o zgemqrt.o \ | zgeqrt.o zgeqrt2.o zgeqrt3.o zgemqrt.o \ | ||||
| ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o | |||||
| ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o zpotri.o | |||||
| ifdef USEXBLAS | ifdef USEXBLAS | ||||
| ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ | ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ | ||||
| @@ -321,24 +321,24 @@ | |||||
| * | * | ||||
| MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) | MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) | ||||
| * Compute space needed for CGEQRF | * Compute space needed for CGEQRF | ||||
| CALL CGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_CGEQRF=DUM(1) | |||||
| CALL CGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_CGEQRF=CDUM(1) | |||||
| * Compute space needed for CUNGQR | * Compute space needed for CUNGQR | ||||
| CALL CUNGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_CUNGQR_N=DUM(1) | |||||
| CALL CUNGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_CUNGQR_M=DUM(1) | |||||
| CALL CUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGQR_N=CDUM(1) | |||||
| CALL CUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGQR_M=CDUM(1) | |||||
| * Compute space needed for CGEBRD | * Compute space needed for CGEBRD | ||||
| CALL CGEBRD( N, N, A, LDA, S, DUM(1), DUM(1), | |||||
| $ DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_CGEBRD=DUM(1) | |||||
| CALL CGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1), | |||||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_CGEBRD=CDUM(1) | |||||
| * Compute space needed for CUNGBR | * Compute space needed for CUNGBR | ||||
| CALL CUNGBR( 'P', N, N, N, A, LDA, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_P=DUM(1) | |||||
| CALL CUNGBR( 'Q', N, N, N, A, LDA, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_Q=DUM(1) | |||||
| CALL CUNGBR( 'P', N, N, N, A, LDA, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_P=CDUM(1) | |||||
| CALL CUNGBR( 'Q', N, N, N, A, LDA, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_Q=CDUM(1) | |||||
| * | * | ||||
| MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) | MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) | ||||
| IF( M.GE.MNTHR ) THEN | IF( M.GE.MNTHR ) THEN | ||||
| @@ -444,20 +444,20 @@ | |||||
| * | * | ||||
| * Path 10 (M at least N, but not much larger) | * Path 10 (M at least N, but not much larger) | ||||
| * | * | ||||
| CALL CGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), | |||||
| $ DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_CGEBRD=DUM(1) | |||||
| CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), | |||||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_CGEBRD=CDUM(1) | |||||
| MAXWRK = 2*N + LWORK_CGEBRD | MAXWRK = 2*N + LWORK_CGEBRD | ||||
| IF( WNTUS .OR. WNTUO ) THEN | IF( WNTUS .OR. WNTUO ) THEN | ||||
| CALL CUNGBR( 'Q', M, N, N, A, LDA, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_Q=DUM(1) | |||||
| CALL CUNGBR( 'Q', M, N, N, A, LDA, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_Q=CDUM(1) | |||||
| MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q ) | MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q ) | ||||
| END IF | END IF | ||||
| IF( WNTUA ) THEN | IF( WNTUA ) THEN | ||||
| CALL CUNGBR( 'Q', M, M, N, A, LDA, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_Q=DUM(1) | |||||
| CALL CUNGBR( 'Q', M, M, N, A, LDA, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_Q=CDUM(1) | |||||
| MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q ) | MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q ) | ||||
| END IF | END IF | ||||
| IF( .NOT.WNTVN ) THEN | IF( .NOT.WNTVN ) THEN | ||||
| @@ -471,25 +471,26 @@ | |||||
| * | * | ||||
| MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) | MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) | ||||
| * Compute space needed for CGELQF | * Compute space needed for CGELQF | ||||
| CALL CGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_CGELQF=DUM(1) | |||||
| CALL CGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_CGELQF=CDUM(1) | |||||
| * Compute space needed for CUNGLQ | * Compute space needed for CUNGLQ | ||||
| CALL CUNGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_CUNGLQ_N=DUM(1) | |||||
| CALL CUNGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_CUNGLQ_M=DUM(1) | |||||
| CALL CUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1, | |||||
| $ IERR ) | |||||
| LWORK_CUNGLQ_N=CDUM(1) | |||||
| CALL CUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGLQ_M=CDUM(1) | |||||
| * Compute space needed for CGEBRD | * Compute space needed for CGEBRD | ||||
| CALL CGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), | |||||
| $ DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_CGEBRD=DUM(1) | |||||
| CALL CGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1), | |||||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_CGEBRD=CDUM(1) | |||||
| * Compute space needed for CUNGBR P | * Compute space needed for CUNGBR P | ||||
| CALL CUNGBR( 'P', M, M, M, A, N, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_P=DUM(1) | |||||
| CALL CUNGBR( 'P', M, M, M, A, N, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_P=CDUM(1) | |||||
| * Compute space needed for CUNGBR Q | * Compute space needed for CUNGBR Q | ||||
| CALL CUNGBR( 'Q', M, M, M, A, N, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_Q=DUM(1) | |||||
| CALL CUNGBR( 'Q', M, M, M, A, N, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_Q=CDUM(1) | |||||
| IF( N.GE.MNTHR ) THEN | IF( N.GE.MNTHR ) THEN | ||||
| IF( WNTVN ) THEN | IF( WNTVN ) THEN | ||||
| * | * | ||||
| @@ -593,21 +594,21 @@ | |||||
| * | * | ||||
| * Path 10t(N greater than M, but not much larger) | * Path 10t(N greater than M, but not much larger) | ||||
| * | * | ||||
| CALL CGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), | |||||
| $ DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_CGEBRD=DUM(1) | |||||
| CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), | |||||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_CGEBRD=CDUM(1) | |||||
| MAXWRK = 2*M + LWORK_CGEBRD | MAXWRK = 2*M + LWORK_CGEBRD | ||||
| IF( WNTVS .OR. WNTVO ) THEN | IF( WNTVS .OR. WNTVO ) THEN | ||||
| * Compute space needed for CUNGBR P | * Compute space needed for CUNGBR P | ||||
| CALL CUNGBR( 'P', M, N, M, A, N, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_P=DUM(1) | |||||
| CALL CUNGBR( 'P', M, N, M, A, N, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_P=CDUM(1) | |||||
| MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P ) | MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P ) | ||||
| END IF | END IF | ||||
| IF( WNTVA ) THEN | IF( WNTVA ) THEN | ||||
| CALL CUNGBR( 'P', N, N, M, A, N, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_P=DUM(1) | |||||
| CALL CUNGBR( 'P', N, N, M, A, N, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_CUNGBR_P=CDUM(1) | |||||
| MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P ) | MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P ) | ||||
| END IF | END IF | ||||
| IF( .NOT.WNTUN ) THEN | IF( .NOT.WNTUN ) THEN | ||||
| @@ -286,7 +286,7 @@ | |||||
| CLANHF = ZERO | CLANHF = ZERO | ||||
| RETURN | RETURN | ||||
| ELSE IF( N.EQ.1 ) THEN | ELSE IF( N.EQ.1 ) THEN | ||||
| CLANHF = ABS( A(0) ) | |||||
| CLANHF = ABS(REAL(A(0))) | |||||
| RETURN | RETURN | ||||
| END IF | END IF | ||||
| * | * | ||||
| @@ -321,24 +321,24 @@ | |||||
| * | * | ||||
| MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) | MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) | ||||
| * Compute space needed for ZGEQRF | * Compute space needed for ZGEQRF | ||||
| CALL ZGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_ZGEQRF=DUM(1) | |||||
| CALL ZGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_ZGEQRF=CDUM(1) | |||||
| * Compute space needed for ZUNGQR | * Compute space needed for ZUNGQR | ||||
| CALL ZUNGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGQR_N=DUM(1) | |||||
| CALL ZUNGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGQR_M=DUM(1) | |||||
| CALL ZUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGQR_N=CDUM(1) | |||||
| CALL ZUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGQR_M=CDUM(1) | |||||
| * Compute space needed for ZGEBRD | * Compute space needed for ZGEBRD | ||||
| CALL ZGEBRD( N, N, A, LDA, S, DUM(1), DUM(1), | |||||
| $ DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_ZGEBRD=DUM(1) | |||||
| CALL ZGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1), | |||||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_ZGEBRD=CDUM(1) | |||||
| * Compute space needed for ZUNGBR | * Compute space needed for ZUNGBR | ||||
| CALL ZUNGBR( 'P', N, N, N, A, LDA, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_P=DUM(1) | |||||
| CALL ZUNGBR( 'Q', N, N, N, A, LDA, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_Q=DUM(1) | |||||
| CALL ZUNGBR( 'P', N, N, N, A, LDA, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_P=CDUM(1) | |||||
| CALL ZUNGBR( 'Q', N, N, N, A, LDA, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_Q=CDUM(1) | |||||
| * | * | ||||
| IF( M.GE.MNTHR ) THEN | IF( M.GE.MNTHR ) THEN | ||||
| IF( WNTUN ) THEN | IF( WNTUN ) THEN | ||||
| @@ -443,20 +443,20 @@ | |||||
| * | * | ||||
| * Path 10 (M at least N, but not much larger) | * Path 10 (M at least N, but not much larger) | ||||
| * | * | ||||
| CALL ZGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), | |||||
| $ DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_ZGEBRD=DUM(1) | |||||
| CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), | |||||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_ZGEBRD=CDUM(1) | |||||
| MAXWRK = 2*N + LWORK_ZGEBRD | MAXWRK = 2*N + LWORK_ZGEBRD | ||||
| IF( WNTUS .OR. WNTUO ) THEN | IF( WNTUS .OR. WNTUO ) THEN | ||||
| CALL ZUNGBR( 'Q', M, N, N, A, LDA, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_Q=DUM(1) | |||||
| CALL ZUNGBR( 'Q', M, N, N, A, LDA, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_Q=CDUM(1) | |||||
| MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q ) | MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q ) | ||||
| END IF | END IF | ||||
| IF( WNTUA ) THEN | IF( WNTUA ) THEN | ||||
| CALL ZUNGBR( 'Q', M, M, N, A, LDA, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_Q=DUM(1) | |||||
| CALL ZUNGBR( 'Q', M, M, N, A, LDA, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_Q=CDUM(1) | |||||
| MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q ) | MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q ) | ||||
| END IF | END IF | ||||
| IF( .NOT.WNTVN ) THEN | IF( .NOT.WNTVN ) THEN | ||||
| @@ -470,25 +470,26 @@ | |||||
| * | * | ||||
| MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) | MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) | ||||
| * Compute space needed for ZGELQF | * Compute space needed for ZGELQF | ||||
| CALL ZGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_ZGELQF=DUM(1) | |||||
| CALL ZGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_ZGELQF=CDUM(1) | |||||
| * Compute space needed for ZUNGLQ | * Compute space needed for ZUNGLQ | ||||
| CALL ZUNGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGLQ_N=DUM(1) | |||||
| CALL ZUNGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGLQ_M=DUM(1) | |||||
| CALL ZUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1, | |||||
| $ IERR ) | |||||
| LWORK_ZUNGLQ_N=CDUM(1) | |||||
| CALL ZUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGLQ_M=CDUM(1) | |||||
| * Compute space needed for ZGEBRD | * Compute space needed for ZGEBRD | ||||
| CALL ZGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), | |||||
| $ DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_ZGEBRD=DUM(1) | |||||
| CALL ZGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1), | |||||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_ZGEBRD=CDUM(1) | |||||
| * Compute space needed for ZUNGBR P | * Compute space needed for ZUNGBR P | ||||
| CALL ZUNGBR( 'P', M, M, M, A, N, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_P=DUM(1) | |||||
| CALL ZUNGBR( 'P', M, M, M, A, N, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_P=CDUM(1) | |||||
| * Compute space needed for ZUNGBR Q | * Compute space needed for ZUNGBR Q | ||||
| CALL ZUNGBR( 'Q', M, M, M, A, N, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_Q=DUM(1) | |||||
| CALL ZUNGBR( 'Q', M, M, M, A, N, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_Q=CDUM(1) | |||||
| IF( N.GE.MNTHR ) THEN | IF( N.GE.MNTHR ) THEN | ||||
| IF( WNTVN ) THEN | IF( WNTVN ) THEN | ||||
| * | * | ||||
| @@ -592,21 +593,21 @@ | |||||
| * | * | ||||
| * Path 10t(N greater than M, but not much larger) | * Path 10t(N greater than M, but not much larger) | ||||
| * | * | ||||
| CALL ZGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), | |||||
| $ DUM(1), DUM(1), -1, IERR ) | |||||
| LWORK_ZGEBRD=DUM(1) | |||||
| CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), | |||||
| $ CDUM(1), CDUM(1), -1, IERR ) | |||||
| LWORK_ZGEBRD=CDUM(1) | |||||
| MAXWRK = 2*M + LWORK_ZGEBRD | MAXWRK = 2*M + LWORK_ZGEBRD | ||||
| IF( WNTVS .OR. WNTVO ) THEN | IF( WNTVS .OR. WNTVO ) THEN | ||||
| * Compute space needed for ZUNGBR P | * Compute space needed for ZUNGBR P | ||||
| CALL ZUNGBR( 'P', M, N, M, A, N, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_P=DUM(1) | |||||
| CALL ZUNGBR( 'P', M, N, M, A, N, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_P=CDUM(1) | |||||
| MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P ) | MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P ) | ||||
| END IF | END IF | ||||
| IF( WNTVA ) THEN | IF( WNTVA ) THEN | ||||
| CALL ZUNGBR( 'P', N, N, M, A, N, DUM(1), | |||||
| $ DUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_P=DUM(1) | |||||
| CALL ZUNGBR( 'P', N, N, M, A, N, CDUM(1), | |||||
| $ CDUM(1), -1, IERR ) | |||||
| LWORK_ZUNGBR_P=CDUM(1) | |||||
| MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P ) | MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P ) | ||||
| END IF | END IF | ||||
| IF( .NOT.WNTUN ) THEN | IF( .NOT.WNTUN ) THEN | ||||
| @@ -286,7 +286,7 @@ | |||||
| ZLANHF = ZERO | ZLANHF = ZERO | ||||
| RETURN | RETURN | ||||
| ELSE IF( N.EQ.1 ) THEN | ELSE IF( N.EQ.1 ) THEN | ||||
| ZLANHF = ABS( A(0) ) | |||||
| ZLANHF = ABS(DBLE(A(0))) | |||||
| RETURN | RETURN | ||||
| END IF | END IF | ||||
| * | * | ||||
| @@ -526,10 +526,10 @@ | |||||
| IF (SN.NE.ZERO) THEN | IF (SN.NE.ZERO) THEN | ||||
| IF (CS.NE.ZERO) THEN | IF (CS.NE.ZERO) THEN | ||||
| ISUPPZ(2*M-1) = 1 | ISUPPZ(2*M-1) = 1 | ||||
| ISUPPZ(2*M-1) = 2 | |||||
| ISUPPZ(2*M) = 2 | |||||
| ELSE | ELSE | ||||
| ISUPPZ(2*M-1) = 1 | ISUPPZ(2*M-1) = 1 | ||||
| ISUPPZ(2*M-1) = 1 | |||||
| ISUPPZ(2*M) = 1 | |||||
| END IF | END IF | ||||
| ELSE | ELSE | ||||
| ISUPPZ(2*M-1) = 2 | ISUPPZ(2*M-1) = 2 | ||||
| @@ -550,10 +550,10 @@ | |||||
| IF (SN.NE.ZERO) THEN | IF (SN.NE.ZERO) THEN | ||||
| IF (CS.NE.ZERO) THEN | IF (CS.NE.ZERO) THEN | ||||
| ISUPPZ(2*M-1) = 1 | ISUPPZ(2*M-1) = 1 | ||||
| ISUPPZ(2*M-1) = 2 | |||||
| ISUPPZ(2*M) = 2 | |||||
| ELSE | ELSE | ||||
| ISUPPZ(2*M-1) = 1 | ISUPPZ(2*M-1) = 1 | ||||
| ISUPPZ(2*M-1) = 1 | |||||
| ISUPPZ(2*M) = 1 | |||||
| END IF | END IF | ||||
| ELSE | ELSE | ||||
| ISUPPZ(2*M-1) = 2 | ISUPPZ(2*M-1) = 2 | ||||
| @@ -1,8 +1,8 @@ | |||||
| Data file for testing DSGESV/DSPOSV LAPACK routines | Data file for testing DSGESV/DSPOSV LAPACK routines | ||||
| 11 Number of values of M | |||||
| 0 1 2 13 17 45 78 91 101 120 132 Values of M (row dimension) | |||||
| 12 Number of values of M | |||||
| 0 1 2 13 17 45 78 91 101 119 120 132 values of M (row dimension) | |||||
| 4 Number of values of NRHS | 4 Number of values of NRHS | ||||
| 1 2 15 16 Values of NRHS (number of right hand sides) | |||||
| 1 2 14 16 Values of NRHS (number of right hand sides) | |||||
| 30.0 Threshold value of test ratio | 30.0 Threshold value of test ratio | ||||
| T Put T to test the driver routine | T Put T to test the driver routine | ||||
| T Put T to test the error exits | T Put T to test the error exits | ||||