Browse Source

Merge branch 'develop'

tags/v0.2.10
Zhang Xianyi 12 years ago
parent
commit
21b5347fbe
32 changed files with 2654 additions and 514 deletions
  1. +20
    -0
      Changelog.txt
  2. +15
    -1
      Makefile.rule
  3. +12
    -1
      Makefile.system
  4. +623
    -91
      benchmark/Makefile
  5. +210
    -0
      benchmark/gemm.c
  6. +192
    -0
      benchmark/hemm.c
  7. +191
    -0
      benchmark/her2k.c
  8. +189
    -0
      benchmark/herk.c
  9. +203
    -0
      benchmark/symm.c
  10. +203
    -0
      benchmark/syr2k.c
  11. +199
    -0
      benchmark/syrk.c
  12. +202
    -0
      benchmark/trmm.c
  13. +202
    -0
      benchmark/trsm.c
  14. +16
    -0
      cpuid_x86.c
  15. +4
    -0
      driver/others/dynamic.c
  16. +19
    -18
      interface/Makefile
  17. +4
    -2
      kernel/arm/KERNEL.ARMV6
  18. +4
    -2
      kernel/arm/KERNEL.ARMV7
  19. +35
    -275
      kernel/x86/KERNEL
  20. +0
    -3
      kernel/x86_64/KERNEL.BARCELONA
  21. +0
    -2
      kernel/x86_64/KERNEL.BULLDOZER
  22. +0
    -3
      kernel/x86_64/KERNEL.HASWELL
  23. +0
    -3
      kernel/x86_64/KERNEL.NEHALEM
  24. +0
    -2
      kernel/x86_64/KERNEL.PILEDRIVER
  25. +0
    -2
      kernel/x86_64/KERNEL.SANDYBRIDGE
  26. +4
    -4
      lapack-netlib/SRC/Makefile
  27. +49
    -48
      lapack-netlib/SRC/cgesvd.f
  28. +1
    -1
      lapack-netlib/SRC/clanhf.f
  29. +49
    -48
      lapack-netlib/SRC/zgesvd.f
  30. +1
    -1
      lapack-netlib/SRC/zlanhf.f
  31. +4
    -4
      lapack-netlib/SRC/zstemr.f
  32. +3
    -3
      lapack-netlib/TESTING/dstest.in

+ 20
- 0
Changelog.txt View File

@@ -1,4 +1,24 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.10
16-Jul-2014
common:
* Added BLAS extensions as following.
s/d/c/zaxpby, s/d/c/zimatcopy, s/d/c/zomatcopy.
* Added OPENBLAS_CORETYPE environment for dynamic_arch. (a86d34)
* Added NO_AVX2 flag for old binutils. (#401)
* Support outputing the CPU corename on runtime.(#407)
* Patched LAPACK to fix bug 114, 117, 118.
(http://www.netlib.org/lapack/bug_list.html)
* Disabled ?gemm3m for a work-around fix. (#400)
x86/x86-64:
* Fixed lots of bugs for optimized kernels on sandybridge,Haswell,
bulldozer, and piledriver.
https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List

ARM:
* Improved LAPACK testing.

====================================================================
Version 0.2.9
10-Jun-2014


+ 15
- 1
Makefile.rule View File

@@ -3,7 +3,7 @@
#

# This library's version
VERSION = 0.2.10.rc2
VERSION = 0.2.10

# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -25,9 +25,20 @@ VERSION = 0.2.10.rc2
# FC = gfortran

# Even you can specify cross compiler. Meanwhile, please set HOSTCC.

# cross compiler for Windows
# CC = x86_64-w64-mingw32-gcc
# FC = x86_64-w64-mingw32-gfortran

# cross compiler for 32bit ARM
# CC = arm-linux-gnueabihf-gcc
# FC = arm-linux-gnueabihf-gfortran

# cross compiler for 64bit ARM
# CC = aarch64-linux-gnu-gcc
# FC = aarch64-linux-gnu-gfortran


# If you use the cross compiler, please set this host compiler.
# HOSTCC = gcc

@@ -88,6 +99,9 @@ NO_AFFINITY = 1
# and OS. However, the performance is low.
# NO_AVX = 1

# Don't use Haswell optimizations if binutils is too old (e.g. RHEL6)
# NO_AVX2 = 1

# Don't use parallel make.
# NO_PARALLEL_MAKE = 1



+ 12
- 1
Makefile.system View File

@@ -109,6 +109,10 @@ ifeq ($(BINARY), 32)
GETARCH_FLAGS += -DNO_AVX
endif

ifeq ($(NO_AVX2), 1)
GETARCH_FLAGS += -DNO_AVX2
endif

ifeq ($(DEBUG), 1)
GETARCH_FLAGS += -g
endif
@@ -385,7 +389,10 @@ endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
endif
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL
endif
endif

@@ -777,6 +784,10 @@ ifeq ($(BINARY), 32)
CCOMMON_OPT += -DNO_AVX
endif

ifeq ($(NO_AVX2), 1)
CCOMMON_OPT += -DNO_AVX2
endif

ifdef SMP
CCOMMON_OPT += -DSMP_SERVER



+ 623
- 91
benchmark/Makefile View File

@@ -1,157 +1,607 @@
TOPDIR = ..
include $(TOPDIR)/Makefile.system

CULA_INC = -I/usr/local/cula/include
CULA_LIB = -L/usr/local/cula/lib64 -Wl,-rpath,/usr/local/cula/lib64 -lcula_fortran -lcula -lcublas

all :: dlinpack.goto dlinpack.mkl dlinpack.acml dcholesky.goto dcholesky.mkl dcholesky.acml
./dlinpack.goto 4000 4000 1
-./dlinpack.mkl 4000 4000 1
-./dlinpack.acml 4000 4000 1
./dcholesky.goto 4000 4000 1
-./dcholesky.mkl 4000 4000 1
-./dcholesky.acml 4000 4000 1

# ACML standard
ACML=/opt/acml5.3.1/gfortran64_mp/lib
LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm

# ACML custom
#ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib
#LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm

# Atlas Ubuntu
#ATLAS=/usr/lib/atlas-base
#LIBATLAS = -fopenmp $(ATLAS)/liblapack_atlas.a $(ATLAS)/libptcblas.a $(ATLAS)/libptf77blas.a $(ATLAS)/libatlas.a -lgfortran -lm

# Atlas RHEL and Fedora
ATLAS=/usr/lib64/atlas
LIBATLAS = -fopenmp $(ATLAS)/liblapack.a $(ATLAS)/libptcblas.a $(ATLAS)/libptf77blas.a $(ATLAS)/libatlas.a -lgfortran -lm

# Intel standard
MKL=/opt/intel/mkl/lib/intel64
LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm

# Intel custom
#MKL=/home/saar/intel_mkl
#LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm



goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
chemm.goto zhemm.goto \
cherk.goto zherk.goto \
cher2k.goto zher2k.goto \
ssymm.goto dsymm.goto csymm.goto zsymm.goto

acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
sgemm.acml dgemm.acml cgemm.acml zgemm.acml \
strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \
strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \
ssyrk.acml dsyrk.acml csyrk.acml zsyrk.acml \
ssyr2k.acml dsyr2k.acml csyr2k.acml zsyr2k.acml \
chemm.acml zhemm.acml \
cherk.acml zherk.acml \
cher2k.acml zher2k.acml \
ssymm.acml dsymm.acml csymm.acml zsymm.acml

atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
scholesky.atlas dcholesky.atlas ccholesky.atlas zcholesky.atlas \
sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \
strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \
strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \
ssyrk.atlas dsyrk.atlas csyrk.atlas zsyrk.atlas \
ssyr2k.atlas dsyr2k.atlas csyr2k.atlas zsyr2k.atlas \
chemm.atlas zhemm.atlas \
cherk.atlas zherk.atlas \
cher2k.atlas zher2k.atlas \
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas

mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \
sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \
strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \
strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \
ssyrk.mkl dsyrk.mkl csyrk.mkl zsyrk.mkl \
ssyr2k.mkl dsyr2k.mkl csyr2k.mkl zsyr2k.mkl \
chemm.mkl zhemm.mkl \
cherk.mkl zherk.mkl \
cher2k.mkl zher2k.mkl \
ssymm.mkl dsymm.mkl csymm.mkl zsymm.mkl

all :: goto atlas acml mkl

##################################### Slinpack ####################################################
slinpack.goto : slinpack.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

slinpack.acml : slinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

slinpack.atlas : slinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

slinpack.mkl : slinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Dlinpack ####################################################
dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

qlinpack.goto : qlinpack.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm
dlinpack.acml : dlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dlinpack.atlas : dlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dlinpack.mkl : dlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Clinpack ####################################################

clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

clinpack.acml : clinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

clinpack.atlas : clinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

clinpack.mkl : clinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Zlinpack ####################################################

zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

xlinpack.goto : xlinpack.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm
zlinpack.acml : zlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zlinpack.atlas : zlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zlinpack.mkl : zlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Scholesky ###################################################

scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

scholesky.acml : scholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

scholesky.atlas : scholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

scholesky.mkl : scholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Dcholesky ###################################################

dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

qcholesky.goto : qcholesky.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm
dcholesky.acml : dcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dcholesky.atlas : dcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dcholesky.mkl : dcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Ccholesky ###################################################

ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

ccholesky.acml : ccholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ccholesky.atlas : ccholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ccholesky.mkl : ccholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zcholesky.goto : zcholesky.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

##################################### Zcholesky ###################################################

xcholesky.goto : xcholesky.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

slinpack.mkl : slinpack.$(SUFFIX)
-$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
zcholesky.acml : zcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dlinpack.mkl : dlinpack.$(SUFFIX)
-$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
zcholesky.atlas : zcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

clinpack.mkl : clinpack.$(SUFFIX)
-$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
zcholesky.mkl : zcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zlinpack.mkl : zlinpack.$(SUFFIX)
-$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

scholesky.mkl : scholesky.$(SUFFIX)
-$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Sgemm ####################################################
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

dcholesky.mkl : dcholesky.$(SUFFIX)
-$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
sgemm.acml : sgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ccholesky.mkl : ccholesky.$(SUFFIX)
-$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
sgemm.atlas : sgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zcholesky.mkl : zcholesky.$(SUFFIX)
-$(CC) -static $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
sgemm.mkl : sgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

slinpack.acml : slinpack.$(SUFFIX)
##################################### Dgemm ####################################################
dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

dgemm.acml : dgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dlinpack.acml : dlinpack.$(SUFFIX)
dgemm.atlas : dgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dgemm.mkl : dgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Cgemm ####################################################

cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

cgemm.acml : cgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

clinpack.acml : clinpack.$(SUFFIX)
cgemm.atlas : cgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

cgemm.mkl : cgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Zgemm ####################################################

zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

zgemm.acml : zgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zlinpack.acml : zlinpack.$(SUFFIX)
zgemm.atlas : zgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zgemm.mkl : zgemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Ssymm ####################################################
ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

ssymm.acml : ssymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

scholesky.acml : scholesky.$(SUFFIX)
ssymm.atlas : ssymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ssymm.mkl : ssymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Dsymm ####################################################
dsymm.goto : dsymm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

dsymm.acml : dsymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dcholesky.acml : dcholesky.$(SUFFIX)
dsymm.atlas : dsymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dsymm.mkl : dsymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Csymm ####################################################

csymm.goto : csymm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

csymm.acml : csymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ccholesky.acml : ccholesky.$(SUFFIX)
csymm.atlas : csymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

csymm.mkl : csymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Zsymm ####################################################

zsymm.goto : zsymm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

zsymm.acml : zsymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zcholesky.acml : zcholesky.$(SUFFIX)
zsymm.atlas : zsymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zsymm.mkl : zsymm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Strmm ####################################################
strmm.goto : strmm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

strmm.acml : strmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

strmm.atlas : strmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

strmm.mkl : strmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Dtrmm ####################################################
dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

dtrmm.acml : dtrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dtrmm.atlas : dtrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dtrmm.mkl : dtrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Ctrmm ####################################################

ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

ctrmm.acml : ctrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

slinpack.flame : slinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ctrmm.atlas : ctrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dlinpack.flame : dlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ctrmm.mkl : ctrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

clinpack.flame : clinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ztrmm ####################################################

zlinpack.flame : zlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

scholesky.flame : scholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ztrmm.acml : ztrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dcholesky.flame : dcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ztrmm.atlas : ztrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ccholesky.flame : ccholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ztrmm.mkl : ztrmm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zcholesky.flame : zcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBFLAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

slinpack.sun : slinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Strsm ####################################################
strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

dlinpack.sun : dlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
strsm.acml : strsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

clinpack.sun : clinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
strsm.atlas : strsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zlinpack.sun : zlinpack.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
strsm.mkl : strsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

scholesky.sun : scholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Dtrsm ####################################################
dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

dcholesky.sun : dcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
dtrsm.acml : dtrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ccholesky.sun : ccholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
dtrsm.atlas : dtrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zcholesky.sun : zcholesky.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBSUNPERF) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
dtrsm.mkl : dtrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

slinpack.cula : slinpack.$(SUFFIX) cula_wrapper.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(CULA_LIB) ../$(LIBNAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
##################################### Ctrsm ####################################################

clinpack.cula : clinpack.$(SUFFIX) cula_wrapper.$(SUFFIX)
$(CC) $(CFLAGS) -o $(@F) $^ $(CULA_LIB) ../$(LIBNAME) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

ctrsm.acml : ctrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

cula_wrapper.$(SUFFIX) : cula_wrapper.c
$(CC) $(CFLAGS) -c $(CULA_INC) -o $(@F) $^
ctrsm.atlas : ctrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ctrsm.mkl : ctrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Ztrsm ####################################################

ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

ztrsm.acml : ztrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ztrsm.atlas : ztrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ztrsm.mkl : ztrsm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Ssyrk ####################################################
ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

ssyrk.acml : ssyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ssyrk.atlas : ssyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ssyrk.mkl : ssyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Dsyrk ####################################################
dsyrk.goto : dsyrk.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

dsyrk.acml : dsyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dsyrk.atlas : dsyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dsyrk.mkl : dsyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Csyrk ####################################################

csyrk.goto : csyrk.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

csyrk.acml : csyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

csyrk.atlas : csyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

csyrk.mkl : csyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Zsyrk ####################################################

zsyrk.goto : zsyrk.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

zsyrk.acml : zsyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zsyrk.atlas : zsyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zsyrk.mkl : zsyrk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)


##################################### Ssyr2k ####################################################
ssyr2k.goto : ssyr2k.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

ssyr2k.acml : ssyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ssyr2k.atlas : ssyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

ssyr2k.mkl : ssyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Dsyr2k ####################################################
dsyr2k.goto : dsyr2k.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

dsyr2k.acml : dsyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dsyr2k.atlas : dsyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

dsyr2k.mkl : dsyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Csyr2k ####################################################

csyr2k.goto : csyr2k.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

csyr2k.acml : csyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

csyr2k.atlas : csyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

csyr2k.mkl : csyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Zsyr2k ####################################################

zsyr2k.goto : zsyr2k.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

zsyr2k.acml : zsyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zsyr2k.atlas : zsyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zsyr2k.mkl : zsyr2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Chemm ####################################################

chemm.goto : chemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

chemm.acml : chemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

chemm.atlas : chemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

chemm.mkl : chemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Zhemm ####################################################

zhemm.goto : zhemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

zhemm.acml : zhemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zhemm.atlas : zhemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zhemm.mkl : zhemm.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Cherk ####################################################

cherk.goto : cherk.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

cherk.acml : cherk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

cherk.atlas : cherk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

cherk.mkl : cherk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Zherk ####################################################

zherk.goto : zherk.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

zherk.acml : zherk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zherk.atlas : zherk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zherk.mkl : zherk.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Cher2k ####################################################

cher2k.goto : cher2k.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

cher2k.acml : cher2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

cher2k.atlas : cher2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

cher2k.mkl : cher2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

##################################### Zher2k ####################################################

zher2k.goto : zher2k.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm

zher2k.acml : zher2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zher2k.atlas : zher2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

zher2k.mkl : zher2k.$(SUFFIX)
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)

###################################################################################################

slinpack.$(SUFFIX) : linpack.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
@@ -159,37 +609,119 @@ slinpack.$(SUFFIX) : linpack.c
dlinpack.$(SUFFIX) : linpack.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^

qlinpack.$(SUFFIX) : linpack.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DXDOUBLE -o $(@F) $^

clinpack.$(SUFFIX) : linpack.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

zlinpack.$(SUFFIX) : linpack.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

xlinpack.$(SUFFIX) : linpack.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DXDOUBLE -o $(@F) $^

scholesky.$(SUFFIX) : cholesky.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^

dcholesky.$(SUFFIX) : cholesky.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^

qcholesky.$(SUFFIX) : cholesky.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DXDOUBLE -o $(@F) $^

ccholesky.$(SUFFIX) : cholesky.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

zcholesky.$(SUFFIX) : cholesky.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

xcholesky.$(SUFFIX) : cholesky.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DXDOUBLE -o $(@F) $^
sgemm.$(SUFFIX) : gemm.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^

dgemm.$(SUFFIX) : gemm.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^

cgemm.$(SUFFIX) : gemm.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

zgemm.$(SUFFIX) : gemm.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

ssymm.$(SUFFIX) : symm.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^

dsymm.$(SUFFIX) : symm.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^

csymm.$(SUFFIX) : symm.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

zsymm.$(SUFFIX) : symm.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

strmm.$(SUFFIX) : trmm.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^

dtrmm.$(SUFFIX) : trmm.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^

ctrmm.$(SUFFIX) : trmm.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

ztrmm.$(SUFFIX) : trmm.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

strsm.$(SUFFIX) : trsm.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^

dtrsm.$(SUFFIX) : trsm.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^

ctrsm.$(SUFFIX) : trsm.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

ztrsm.$(SUFFIX) : trsm.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

ssyrk.$(SUFFIX) : syrk.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^

dsyrk.$(SUFFIX) : syrk.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^

csyrk.$(SUFFIX) : syrk.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

zsyrk.$(SUFFIX) : syrk.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

ssyr2k.$(SUFFIX) : syr2k.c
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^

dsyr2k.$(SUFFIX) : syr2k.c
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^

csyr2k.$(SUFFIX) : syr2k.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

zsyr2k.$(SUFFIX) : syr2k.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

chemm.$(SUFFIX) : hemm.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

zhemm.$(SUFFIX) : hemm.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

cherk.$(SUFFIX) : herk.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

zherk.$(SUFFIX) : herk.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

cher2k.$(SUFFIX) : her2k.c
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^

zher2k.$(SUFFIX) : her2k.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^




clean ::
@rm -f *.goto *.mkl *.acml *.sun *.cula
@rm -f *.goto *.mkl *.acml *.atlas

include $(TOPDIR)/Makefile.tail


+ 210
- 0
benchmark/gemm.c View File

@@ -0,0 +1,210 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"


#undef GEMM

#ifndef COMPLEX

#ifdef DOUBLE
#define GEMM BLASFUNC(dgemm)
#else
#define GEMM BLASFUNC(sgemm)
#endif

#else

#ifdef DOUBLE
#define GEMM BLASFUNC(zgemm)
#else
#define GEMM BLASFUNC(cgemm)
#endif

#endif

#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int MAIN__(int argc, char *argv[]){

FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char trans='N';
blasint m, i, j;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

struct timeval start, stop;
double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

p = getenv("OPENBLAS_LOOPS");
if ( p != NULL )
loops = atoi(p);


#ifdef linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);

for (l=0; l<loops; l++)
{
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

gettimeofday( &start, (struct timezone *)0);

GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );

gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

timeg += time1;

}

timeg /= loops;
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6);

}

return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 192
- 0
benchmark/hemm.c View File

@@ -0,0 +1,192 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"


#undef HEMM

#ifdef DOUBLE
#define HEMM BLASFUNC(zhemm)
#else
#define HEMM BLASFUNC(chemm)
#endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int MAIN__(int argc, char *argv[]){

FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char side='L';
char uplo='U';

if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

struct timeval start, stop;
double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

gettimeofday( &start, (struct timezone *)0);

HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );

gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

gettimeofday( &start, (struct timezone *)0);

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 191
- 0
benchmark/her2k.c View File

@@ -0,0 +1,191 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"


#undef HER2K
#ifdef DOUBLE
#define HER2K BLASFUNC(zher2k)
#else
#define HER2K BLASFUNC(cher2k)
#endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int MAIN__(int argc, char *argv[]){

FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char uplo='U';
char trans='N';

if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

struct timeval start, stop;
double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

gettimeofday( &start, (struct timezone *)0);

HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );

gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

gettimeofday( &start, (struct timezone *)0);

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 189
- 0
benchmark/herk.c View File

@@ -0,0 +1,189 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"


#undef HERK


#ifdef DOUBLE
#define HERK BLASFUNC(zherk)
#else
#define HERK BLASFUNC(cherk)
#endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int MAIN__(int argc, char *argv[]){

FLOAT *a, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char uplo='U';
char trans='N';

if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

struct timeval start, stop;
double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);


if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

gettimeofday( &start, (struct timezone *)0);

HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );

gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

gettimeofday( &start, (struct timezone *)0);

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 203
- 0
benchmark/symm.c View File

@@ -0,0 +1,203 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"


#undef SYMM

#ifndef COMPLEX

#ifdef DOUBLE
#define SYMM BLASFUNC(dsymm)
#else
#define SYMM BLASFUNC(ssymm)
#endif

#else

#ifdef DOUBLE
#define SYMM BLASFUNC(zsymm)
#else
#define SYMM BLASFUNC(csymm)
#endif

#endif

#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int MAIN__(int argc, char *argv[]){

FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char side='L';
char uplo='U';

if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

struct timeval start, stop;
double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

gettimeofday( &start, (struct timezone *)0);

SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );

gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

gettimeofday( &start, (struct timezone *)0);

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 203
- 0
benchmark/syr2k.c View File

@@ -0,0 +1,203 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"


#undef SYR2K

#ifndef COMPLEX

#ifdef DOUBLE
#define SYR2K BLASFUNC(dsyr2k)
#else
#define SYR2K BLASFUNC(ssyr2k)
#endif

#else

#ifdef DOUBLE
#define SYR2K BLASFUNC(zsyr2k)
#else
#define SYR2K BLASFUNC(csyr2k)
#endif

#endif

#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int MAIN__(int argc, char *argv[]){

FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char uplo='U';
char trans='N';

if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

struct timeval start, stop;
double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

gettimeofday( &start, (struct timezone *)0);

SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );

gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

gettimeofday( &start, (struct timezone *)0);

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 199
- 0
benchmark/syrk.c View File

@@ -0,0 +1,199 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"


#undef SYRK

#ifndef COMPLEX

#ifdef DOUBLE
#define SYRK BLASFUNC(dsyrk)
#else
#define SYRK BLASFUNC(ssyrk)
#endif

#else

#ifdef DOUBLE
#define SYRK BLASFUNC(zsyrk)
#else
#define SYRK BLASFUNC(csyrk)
#endif

#endif

#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int MAIN__(int argc, char *argv[]){

FLOAT *a, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char uplo='U';
char trans='N';

if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

struct timeval start, stop;
double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);


if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

gettimeofday( &start, (struct timezone *)0);

SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );

gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

gettimeofday( &start, (struct timezone *)0);

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 202
- 0
benchmark/trmm.c View File

@@ -0,0 +1,202 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"


#undef TRMM

#ifndef COMPLEX

#ifdef DOUBLE
#define TRMM BLASFUNC(dtrmm)
#else
#define TRMM BLASFUNC(strmm)
#endif

#else

#ifdef DOUBLE
#define TRMM BLASFUNC(ztrmm)
#else
#define TRMM BLASFUNC(ctrmm)
#endif

#endif

#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int MAIN__(int argc, char *argv[]){

FLOAT *a, *b;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char side ='L';
char uplo ='U';
char trans='N';
char diag ='U';

if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

struct timeval start, stop;
double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

gettimeofday( &start, (struct timezone *)0);

TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);

gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

gettimeofday( &start, (struct timezone *)0);

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 202
- 0
benchmark/trsm.c View File

@@ -0,0 +1,202 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"


#undef TRSM

#ifndef COMPLEX

#ifdef DOUBLE
#define TRSM BLASFUNC(dtrsm)
#else
#define TRSM BLASFUNC(strsm)
#endif

#else

#ifdef DOUBLE
#define TRSM BLASFUNC(ztrsm)
#else
#define TRSM BLASFUNC(ctrsm)
#endif

#endif

#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int MAIN__(int argc, char *argv[]){

FLOAT *a, *b;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char side ='L';
char uplo ='U';
char trans='N';
char diag ='U';

if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

struct timeval start, stop;
double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

gettimeofday( &start, (struct timezone *)0);

TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);

gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

gettimeofday( &start, (struct timezone *)0);

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 16
- 0
cpuid_x86.c View File

@@ -1062,7 +1062,11 @@ int get_cpuname(void){
case 12:
case 15:
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
}
@@ -1072,7 +1076,11 @@ int get_cpuname(void){
case 5:
case 6:
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
}
@@ -1471,7 +1479,11 @@ int get_coretype(void){
case 12:
case 15:
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
}
@@ -1481,7 +1493,11 @@ int get_coretype(void){
case 5:
case 6:
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
}


+ 4
- 0
driver/others/dynamic.c View File

@@ -66,7 +66,11 @@ extern gotoblas_t gotoblas_BOBCAT;
extern gotoblas_t gotoblas_SANDYBRIDGE;
extern gotoblas_t gotoblas_BULLDOZER;
extern gotoblas_t gotoblas_PILEDRIVER;
#ifdef NO_AVX2
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
#else
extern gotoblas_t gotoblas_HASWELL;
#endif
#else
//Use NEHALEM kernels for sandy bridge
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM


+ 19
- 18
interface/Makefile View File

@@ -356,25 +356,25 @@ ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)

#SLAPACKOBJS = \
# sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
# spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
# slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
# sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
# spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \
# slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX)

SLAPACKOBJS = \
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \
slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX)
slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX)


#DLAPACKOBJS = \
# dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
# dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
# dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
# dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
# dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \
# dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX)

DLAPACKOBJS = \
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \
dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX)
dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX)


QLAPACKOBJS = \
@@ -382,28 +382,29 @@ QLAPACKOBJS = \
qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \
qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \


#CLAPACKOBJS = \
# cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
# cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
# claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
# cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
# cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
# clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)


CLAPACKOBJS = \
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX)


#ZLAPACKOBJS = \
# zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
# zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
# zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
# zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
# zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
# zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX)


ZLAPACKOBJS = \
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX)


zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX)


XLAPACKOBJS = \


+ 4
- 2
kernel/arm/KERNEL.ARMV6 View File

@@ -1,5 +1,7 @@
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
CGEMVNKERNEL = ../arm/zgemv_n.c
CGEMVTKERNEL = ../arm/zgemv_t.c

DGEMVNKERNEL = ../arm/gemv_n.c
DGEMVTKERNEL = ../arm/gemv_t.c
@@ -96,12 +98,12 @@ ZSWAPKERNEL = swap_vfp.S

# BAD SGEMVNKERNEL = gemv_n_vfp.S
# BAD DGEMVNKERNEL = gemv_n_vfp.S
CGEMVNKERNEL = cgemv_n_vfp.S
# CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S

# BAD SGEMVTKERNEL = gemv_t_vfp.S
# BAD DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
# CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S

STRMMKERNEL = strmm_kernel_4x2_vfp.S


+ 4
- 2
kernel/arm/KERNEL.ARMV7 View File

@@ -1,5 +1,7 @@
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
CGEMVNKERNEL = ../arm/zgemv_n.c
CGEMVTKERNEL = ../arm/zgemv_t.c


#################################################################################
@@ -77,12 +79,12 @@ ZSCALKERNEL = zscal.c

# BAD SGEMVNKERNEL = gemv_n_vfp.S
DGEMVNKERNEL = gemv_n_vfp.S
CGEMVNKERNEL = cgemv_n_vfp.S
#CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S

# BAD SGEMVTKERNEL = gemv_t_vfp.S
DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
#CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S

STRMMKERNEL = strmm_kernel_4x4_vfpv3.S


+ 35
- 275
kernel/x86/KERNEL View File

@@ -1,263 +1,3 @@
GEMVDEP = ../l2param.h

ifdef HAVE_SSE

ifndef SAMAXKERNEL
SAMAXKERNEL = amax_sse.S
endif

ifndef CAMAXKERNEL
CAMAXKERNEL = zamax_sse.S
endif

ifndef SAMINKERNEL
SAMINKERNEL = amax_sse.S
endif

ifndef CAMINKERNEL
CAMINKERNEL = zamax_sse.S
endif

ifndef ISAMAXKERNEL
ISAMAXKERNEL = iamax_sse.S
endif

ifndef ICAMAXKERNEL
ICAMAXKERNEL = izamax_sse.S
endif

ifndef ISAMINKERNEL
ISAMINKERNEL = iamax_sse.S
endif

ifndef ICAMINKERNEL
ICAMINKERNEL = izamax_sse.S
endif

ifndef ISMAXKERNEL
ISMAXKERNEL = iamax_sse.S
endif

ifndef ISMINKERNEL
ISMINKERNEL = iamax_sse.S
endif

ifndef SMAXKERNEL
SMAXKERNEL = amax_sse.S
endif

ifndef SMINKERNEL
SMINKERNEL = amax_sse.S
endif

ifndef SASUMKERNEL
SASUMKERNEL = asum_sse.S
endif

ifndef CASUMKERNEL
CASUMKERNEL = zasum_sse.S
endif

ifndef SDOTKERNEL
SDOTKERNEL = ../arm/dot.c
endif

ifndef CDOTKERNEL
CDOTKERNEL = zdot_sse.S
endif

ifndef SCOPYKERNEL
SCOPYKERNEL = copy_sse.S
endif

ifndef CCOPYKERNEL
CCOPYKERNEL = zcopy_sse.S
endif

ifndef SSACALKERNEL
SSCALKERNEL = scal_sse.S
endif

ifndef CSACALKERNEL
CSCALKERNEL = zscal_sse.S
endif

ifndef SAXPYKERNEL
SAXPYKERNEL = axpy_sse.S
endif

ifndef CAXPYKERNEL
CAXPYKERNEL = zaxpy_sse.S
endif

ifndef SROTKERNEL
SROTKERNEL = rot_sse.S
endif

ifndef CROTKERNEL
CROTKERNEL = zrot_sse.S
endif

ifndef SSWAPKERNEL
SSWAPKERNEL = swap_sse.S
endif

ifndef CSWAPKERNEL
CSWAPKERNEL = zswap_sse.S
endif

ifndef SGEMVNKERNEL
SGEMVNKERNEL = ../arm/gemv_n.c
endif

ifndef SGEMVTKERNEL
SGEMVTKERNEL = ../arm/gemv_t.c
endif

ifndef CGEMVNKERNEL
CGEMVNKERNEL = zgemv_n_sse.S
endif

ifndef CGEMVTKERNEL
CGEMVTKERNEL = zgemv_t_sse.S
endif

endif


ifdef HAVE_SSE2

ifndef DAMAXKERNEL
DAMAXKERNEL = amax_sse2.S
endif

ifndef ZAMAXKERNEL
ZAMAXKERNEL = zamax_sse2.S
endif

ifndef DAMINKERNEL
DAMINKERNEL = amax_sse2.S
endif

ifndef ZAMINKERNEL
ZAMINKERNEL = zamax_sse2.S
endif

ifndef IDAMAXKERNEL
IDAMAXKERNEL = iamax_sse2.S
endif

ifndef IZAMAXKERNEL
IZAMAXKERNEL = izamax_sse2.S
endif

ifndef IDAMINKERNEL
IDAMINKERNEL = iamax_sse2.S
endif

ifndef IZAMINKERNEL
IZAMINKERNEL = izamax_sse2.S
endif

ifndef IDMAXKERNEL
IDMAXKERNEL = iamax_sse2.S
endif

ifndef IDMINKERNEL
IDMINKERNEL = iamax_sse2.S
endif

ifndef DMAXKERNEL
DMAXKERNEL = amax_sse2.S
endif

ifndef DMINKERNEL
DMINKERNEL = amax_sse2.S
endif

ifndef DDOTKERNEL
DDOTKERNEL = dot_sse2.S
endif

ifndef ZDOTKERNEL
ZDOTKERNEL = zdot_sse2.S
endif

ifndef DCOPYKERNEL
# DCOPYKERNEL = copy_sse2.S
endif

ifndef ZCOPYKERNEL
ZCOPYKERNEL = zcopy_sse2.S
endif

ifndef DSACALKERNEL
DSCALKERNEL = scal_sse2.S
endif

ifndef ZSACALKERNEL
ZSCALKERNEL = zscal_sse2.S
endif

ifndef DASUMKERNEL
DASUMKERNEL = asum_sse2.S
endif

ifndef ZASUMKERNEL
ZASUMKERNEL = zasum_sse2.S
endif

ifndef DAXPYKERNEL
DAXPYKERNEL = axpy_sse2.S
endif

ifndef ZAXPYKERNEL
ZAXPYKERNEL = zaxpy_sse2.S
endif

ifndef SNRM2KERNEL
SNRM2KERNEL = nrm2_sse.S
endif

ifndef CNRM2KERNEL
CNRM2KERNEL = znrm2_sse.S
endif

ifndef DROTKERNEL
DROTKERNEL = rot_sse2.S
endif

ifndef ZROTKERNEL
ZROTKERNEL = zrot_sse2.S
endif

ifndef DSWAPKERNEL
DSWAPKERNEL = swap_sse2.S
endif

ifndef ZSWAPKERNEL
ZSWAPKERNEL = zswap_sse2.S
endif

ifndef DGEMVNKERNEL
DGEMVNKERNEL = gemv_n_sse2.S
endif

ifndef DGEMVTKERNEL
DGEMVTKERNEL = gemv_t_sse2.S
endif

ifndef ZGEMVNKERNEL
ZGEMVNKERNEL = zgemv_n_sse2.S
endif

ifndef ZGEMVTKERNEL
ZGEMVTKERNEL = zgemv_t_sse2.S
endif

endif


ifndef SAMINKERNEL
SAMINKERNEL = amax.S
endif
@@ -394,21 +134,41 @@ XGEMMITCOPYOBJ =
XGEMMONCOPYOBJ = xgemm_oncopy$(TSUFFIX).$(SUFFIX)
XGEMMOTCOPYOBJ = xgemm_otcopy$(TSUFFIX).$(SUFFIX)

SGEMM_BETA = gemm_beta.S
DGEMM_BETA = gemm_beta.S
QGEMM_BETA = ../generic/gemm_beta.c
CGEMM_BETA = zgemm_beta.S
ZGEMM_BETA = zgemm_beta.S
XGEMM_BETA = ../generic/zgemm_beta.c

QTRSMKERNEL_LN = qtrsm_kernel_LN_2x2.S
QTRSMKERNEL_LT = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RN = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RT = qtrsm_kernel_RT_2x2.S
QTRSMKERNEL_LN = qtrsm_kernel_LN_2x2.S
QTRSMKERNEL_LT = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RN = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RT = qtrsm_kernel_RT_2x2.S

XTRSMKERNEL_LN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_LT = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_LN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_LT = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S

XGEMM3MKERNEL = xgemm3m_kernel_2x2.S


# bug in zdot assembler kernel
ifndef ZDOTKERNEL
ZDOTKERNEL = ../arm/zdot.c
endif

DSDOTKERNEL = ../arm/dot.c

# Bug in znrm2 assembler kernel
ifndef ZNRM2KERNEL
ZNRM2KERNEL = ../arm/znrm2.c
endif

# Bug in zgemv_t assembler kernel
ifndef ZGEMVTKERNEL
ZGEMVTKERNEL = ../arm/zgemv_t.c
endif

SGEMM_BETA = ../generic/gemm_beta.c
DGEMM_BETA = ../generic/gemm_beta.c
CGEMM_BETA = ../generic/zgemm_beta.c
ZGEMM_BETA = ../generic/zgemm_beta.c

QGEMM_BETA = ../generic/gemm_beta.c
XGEMM_BETA = ../generic/zgemm_beta.c


+ 0
- 3
kernel/x86_64/KERNEL.BARCELONA View File

@@ -1,6 +1,3 @@
SGEMVNKERNEL = sgemv_n.S
SGEMVTKERNEL = sgemv_t.S

ZGEMVNKERNEL = zgemv_n_dup.S
ZGEMVTKERNEL = zgemv_t.S



+ 0
- 2
kernel/x86_64/KERNEL.BULLDOZER View File

@@ -1,5 +1,3 @@
SGEMVNKERNEL = sgemv_n.S
SGEMVTKERNEL = sgemv_t.S

ZGEMVNKERNEL = zgemv_n_dup.S
ZGEMVTKERNEL = zgemv_t.S


+ 0
- 3
kernel/x86_64/KERNEL.HASWELL View File

@@ -1,6 +1,3 @@
SGEMVNKERNEL = sgemv_n.S
SGEMVTKERNEL = sgemv_t.S


SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c


+ 0
- 3
kernel/x86_64/KERNEL.NEHALEM View File

@@ -1,6 +1,3 @@
SGEMVNKERNEL = sgemv_n.S
SGEMVTKERNEL = sgemv_t.S


SGEMMKERNEL = gemm_kernel_4x8_nehalem.S
SGEMMINCOPY = gemm_ncopy_4.S


+ 0
- 2
kernel/x86_64/KERNEL.PILEDRIVER View File

@@ -1,5 +1,3 @@
SGEMVNKERNEL = sgemv_n.S
SGEMVTKERNEL = sgemv_t.S

ZGEMVNKERNEL = zgemv_n_dup.S
ZGEMVTKERNEL = zgemv_t.S


+ 0
- 2
kernel/x86_64/KERNEL.SANDYBRIDGE View File

@@ -1,5 +1,3 @@
SGEMVNKERNEL = sgemv_n.S
SGEMVTKERNEL = sgemv_t.S

SGEMMKERNEL = sgemm_kernel_16x4_sandy.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c


+ 4
- 4
lapack-netlib/SRC/Makefile View File

@@ -155,7 +155,7 @@ SLASRC = \
sbbcsd.o slapmr.o sorbdb.o sorbdb1.o sorbdb2.o sorbdb3.o sorbdb4.o \
sorbdb5.o sorbdb6.o sorcsd.o sorcsd2by1.o \
sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o spotri.o

DSLASRC = spotrs.o

@@ -236,7 +236,7 @@ CLASRC = \
cbbcsd.o clapmr.o cunbdb.o cunbdb1.o cunbdb2.o cunbdb3.o cunbdb4.o \
cunbdb5.o cunbdb6.o cuncsd.o cuncsd2by1.o \
cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \
ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o
ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o cpotri.o

ifdef USEXBLAS
CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
@@ -316,7 +316,7 @@ DLASRC = \
dbbcsd.o dlapmr.o dorbdb.o dorbdb1.o dorbdb2.o dorbdb3.o dorbdb4.o \
dorbdb5.o dorbdb6.o dorcsd.o dorcsd2by1.o \
dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \
dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o
dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o dpotri.o

ifdef USEXBLAS
DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
@@ -400,7 +400,7 @@ ZLASRC = \
zbbcsd.o zlapmr.o zunbdb.o zunbdb1.o zunbdb2.o zunbdb3.o zunbdb4.o \
zunbdb5.o zunbdb6.o zuncsd.o zuncsd2by1.o \
zgeqrt.o zgeqrt2.o zgeqrt3.o zgemqrt.o \
ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o
ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o zpotri.o

ifdef USEXBLAS
ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \


+ 49
- 48
lapack-netlib/SRC/cgesvd.f View File

@@ -321,24 +321,24 @@
*
MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
* Compute space needed for CGEQRF
CALL CGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_CGEQRF=DUM(1)
CALL CGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_CGEQRF=CDUM(1)
* Compute space needed for CUNGQR
CALL CUNGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_CUNGQR_N=DUM(1)
CALL CUNGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_CUNGQR_M=DUM(1)
CALL CUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_CUNGQR_N=CDUM(1)
CALL CUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_CUNGQR_M=CDUM(1)
* Compute space needed for CGEBRD
CALL CGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_CGEBRD=DUM(1)
CALL CGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_CGEBRD=CDUM(1)
* Compute space needed for CUNGBR
CALL CUNGBR( 'P', N, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_P=DUM(1)
CALL CUNGBR( 'Q', N, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_Q=DUM(1)
CALL CUNGBR( 'P', N, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_P=CDUM(1)
CALL CUNGBR( 'Q', N, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_Q=CDUM(1)
*
MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
IF( M.GE.MNTHR ) THEN
@@ -444,20 +444,20 @@
*
* Path 10 (M at least N, but not much larger)
*
CALL CGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_CGEBRD=DUM(1)
CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_CGEBRD=CDUM(1)
MAXWRK = 2*N + LWORK_CGEBRD
IF( WNTUS .OR. WNTUO ) THEN
CALL CUNGBR( 'Q', M, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_Q=DUM(1)
CALL CUNGBR( 'Q', M, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_Q=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q )
END IF
IF( WNTUA ) THEN
CALL CUNGBR( 'Q', M, M, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_Q=DUM(1)
CALL CUNGBR( 'Q', M, M, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_Q=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q )
END IF
IF( .NOT.WNTVN ) THEN
@@ -471,25 +471,26 @@
*
MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
* Compute space needed for CGELQF
CALL CGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_CGELQF=DUM(1)
CALL CGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_CGELQF=CDUM(1)
* Compute space needed for CUNGLQ
CALL CUNGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
LWORK_CUNGLQ_N=DUM(1)
CALL CUNGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_CUNGLQ_M=DUM(1)
CALL CUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1,
$ IERR )
LWORK_CUNGLQ_N=CDUM(1)
CALL CUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_CUNGLQ_M=CDUM(1)
* Compute space needed for CGEBRD
CALL CGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_CGEBRD=DUM(1)
CALL CGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_CGEBRD=CDUM(1)
* Compute space needed for CUNGBR P
CALL CUNGBR( 'P', M, M, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_P=DUM(1)
CALL CUNGBR( 'P', M, M, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_P=CDUM(1)
* Compute space needed for CUNGBR Q
CALL CUNGBR( 'Q', M, M, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_Q=DUM(1)
CALL CUNGBR( 'Q', M, M, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_Q=CDUM(1)
IF( N.GE.MNTHR ) THEN
IF( WNTVN ) THEN
*
@@ -593,21 +594,21 @@
*
* Path 10t(N greater than M, but not much larger)
*
CALL CGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_CGEBRD=DUM(1)
CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_CGEBRD=CDUM(1)
MAXWRK = 2*M + LWORK_CGEBRD
IF( WNTVS .OR. WNTVO ) THEN
* Compute space needed for CUNGBR P
CALL CUNGBR( 'P', M, N, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_P=DUM(1)
CALL CUNGBR( 'P', M, N, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_P=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P )
END IF
IF( WNTVA ) THEN
CALL CUNGBR( 'P', N, N, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_CUNGBR_P=DUM(1)
CALL CUNGBR( 'P', N, N, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_CUNGBR_P=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P )
END IF
IF( .NOT.WNTUN ) THEN


+ 1
- 1
lapack-netlib/SRC/clanhf.f View File

@@ -286,7 +286,7 @@
CLANHF = ZERO
RETURN
ELSE IF( N.EQ.1 ) THEN
CLANHF = ABS( A(0) )
CLANHF = ABS(REAL(A(0)))
RETURN
END IF
*


+ 49
- 48
lapack-netlib/SRC/zgesvd.f View File

@@ -321,24 +321,24 @@
*
MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 )
* Compute space needed for ZGEQRF
CALL ZGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_ZGEQRF=DUM(1)
CALL ZGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGEQRF=CDUM(1)
* Compute space needed for ZUNGQR
CALL ZUNGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_ZUNGQR_N=DUM(1)
CALL ZUNGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_ZUNGQR_M=DUM(1)
CALL ZUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_ZUNGQR_N=CDUM(1)
CALL ZUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_ZUNGQR_M=CDUM(1)
* Compute space needed for ZGEBRD
CALL ZGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_ZGEBRD=DUM(1)
CALL ZGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGEBRD=CDUM(1)
* Compute space needed for ZUNGBR
CALL ZUNGBR( 'P', N, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_P=DUM(1)
CALL ZUNGBR( 'Q', N, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_Q=DUM(1)
CALL ZUNGBR( 'P', N, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_P=CDUM(1)
CALL ZUNGBR( 'Q', N, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_Q=CDUM(1)
*
IF( M.GE.MNTHR ) THEN
IF( WNTUN ) THEN
@@ -443,20 +443,20 @@
*
* Path 10 (M at least N, but not much larger)
*
CALL ZGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_ZGEBRD=DUM(1)
CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGEBRD=CDUM(1)
MAXWRK = 2*N + LWORK_ZGEBRD
IF( WNTUS .OR. WNTUO ) THEN
CALL ZUNGBR( 'Q', M, N, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_Q=DUM(1)
CALL ZUNGBR( 'Q', M, N, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_Q=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q )
END IF
IF( WNTUA ) THEN
CALL ZUNGBR( 'Q', M, M, N, A, LDA, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_Q=DUM(1)
CALL ZUNGBR( 'Q', M, M, N, A, LDA, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_Q=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q )
END IF
IF( .NOT.WNTVN ) THEN
@@ -470,25 +470,26 @@
*
MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 )
* Compute space needed for ZGELQF
CALL ZGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_ZGELQF=DUM(1)
CALL ZGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGELQF=CDUM(1)
* Compute space needed for ZUNGLQ
CALL ZUNGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
LWORK_ZUNGLQ_N=DUM(1)
CALL ZUNGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
LWORK_ZUNGLQ_M=DUM(1)
CALL ZUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1,
$ IERR )
LWORK_ZUNGLQ_N=CDUM(1)
CALL ZUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR )
LWORK_ZUNGLQ_M=CDUM(1)
* Compute space needed for ZGEBRD
CALL ZGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_ZGEBRD=DUM(1)
CALL ZGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGEBRD=CDUM(1)
* Compute space needed for ZUNGBR P
CALL ZUNGBR( 'P', M, M, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_P=DUM(1)
CALL ZUNGBR( 'P', M, M, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_P=CDUM(1)
* Compute space needed for ZUNGBR Q
CALL ZUNGBR( 'Q', M, M, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_Q=DUM(1)
CALL ZUNGBR( 'Q', M, M, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_Q=CDUM(1)
IF( N.GE.MNTHR ) THEN
IF( WNTVN ) THEN
*
@@ -592,21 +593,21 @@
*
* Path 10t(N greater than M, but not much larger)
*
CALL ZGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
$ DUM(1), DUM(1), -1, IERR )
LWORK_ZGEBRD=DUM(1)
CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1),
$ CDUM(1), CDUM(1), -1, IERR )
LWORK_ZGEBRD=CDUM(1)
MAXWRK = 2*M + LWORK_ZGEBRD
IF( WNTVS .OR. WNTVO ) THEN
* Compute space needed for ZUNGBR P
CALL ZUNGBR( 'P', M, N, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_P=DUM(1)
CALL ZUNGBR( 'P', M, N, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_P=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P )
END IF
IF( WNTVA ) THEN
CALL ZUNGBR( 'P', N, N, M, A, N, DUM(1),
$ DUM(1), -1, IERR )
LWORK_ZUNGBR_P=DUM(1)
CALL ZUNGBR( 'P', N, N, M, A, N, CDUM(1),
$ CDUM(1), -1, IERR )
LWORK_ZUNGBR_P=CDUM(1)
MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P )
END IF
IF( .NOT.WNTUN ) THEN


+ 1
- 1
lapack-netlib/SRC/zlanhf.f View File

@@ -286,7 +286,7 @@
ZLANHF = ZERO
RETURN
ELSE IF( N.EQ.1 ) THEN
ZLANHF = ABS( A(0) )
ZLANHF = ABS(DBLE(A(0)))
RETURN
END IF
*


+ 4
- 4
lapack-netlib/SRC/zstemr.f View File

@@ -526,10 +526,10 @@
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M-1) = 2
ISUPPZ(2*M) = 2
ELSE
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M) = 1
END IF
ELSE
ISUPPZ(2*M-1) = 2
@@ -550,10 +550,10 @@
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M-1) = 2
ISUPPZ(2*M) = 2
ELSE
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M-1) = 1
ISUPPZ(2*M) = 1
END IF
ELSE
ISUPPZ(2*M-1) = 2


+ 3
- 3
lapack-netlib/TESTING/dstest.in View File

@@ -1,8 +1,8 @@
Data file for testing DSGESV/DSPOSV LAPACK routines
11 Number of values of M
0 1 2 13 17 45 78 91 101 120 132 Values of M (row dimension)
12 Number of values of M
0 1 2 13 17 45 78 91 101 119 120 132 values of M (row dimension)
4 Number of values of NRHS
1 2 15 16 Values of NRHS (number of right hand sides)
1 2 14 16 Values of NRHS (number of right hand sides)
30.0 Threshold value of test ratio
T Put T to test the driver routine
T Put T to test the error exits


Loading…
Cancel
Save