Browse Source

Merge branch 'develop' of https://github.com/wernsaar/OpenBLAS into wernsaar-develop

Conflicts:
	kernel/arm/KERNEL.ARMV6
tags/v0.2.9^2
Zhang Xianyi 11 years ago
parent
commit
406f5bd22b
50 changed files with 429 additions and 167 deletions
  1. +4
    -5
      Makefile
  2. +6
    -0
      Makefile.arm
  3. +5
    -2
      Makefile.rule
  4. +1
    -13
      Makefile.system
  5. +8
    -1
      common.h
  6. +1
    -1
      driver/others/Makefile
  7. +16
    -0
      getarch.c
  8. +65
    -44
      interface/Makefile
  9. +0
    -0
      interface/lapack/gesv.c
  10. +0
    -0
      interface/lapack/getf2.c
  11. +0
    -0
      interface/lapack/getrf.c
  12. +0
    -0
      interface/lapack/getrs.c
  13. +0
    -0
      interface/lapack/larf.c.obsolete
  14. +0
    -0
      interface/lapack/laswp.c
  15. +0
    -0
      interface/lapack/lauu2.c.bad
  16. +0
    -0
      interface/lapack/lauum.c.bad
  17. +0
    -0
      interface/lapack/potf2.c
  18. +0
    -0
      interface/lapack/potrf.c
  19. +0
    -0
      interface/lapack/potri.c.bad
  20. +0
    -0
      interface/lapack/trti2.c.bad
  21. +0
    -0
      interface/lapack/trtri.c.bad
  22. +0
    -0
      interface/lapack/zgetf2.c
  23. +0
    -0
      interface/lapack/zgetrf.c
  24. +0
    -0
      interface/lapack/zgetrs.c
  25. +0
    -0
      interface/lapack/zlaswp.c
  26. +0
    -0
      interface/lapack/zlauu2.c.bad
  27. +0
    -0
      interface/lapack/zlauum.c.bad
  28. +0
    -0
      interface/lapack/zpotf2.c
  29. +0
    -0
      interface/lapack/zpotrf.c
  30. +0
    -0
      interface/lapack/zpotri.c.bad
  31. +0
    -0
      interface/lapack/ztrti2.c.bad
  32. +0
    -0
      interface/lapack/ztrtri.c.bad
  33. +5
    -5
      interface/sbmv.c
  34. +5
    -5
      interface/spmv.c
  35. +12
    -3
      interface/syr2k.c
  36. +12
    -3
      interface/syrk.c
  37. +5
    -5
      interface/zhbmv.c
  38. +4
    -4
      interface/zsbmv.c
  39. +4
    -4
      interface/zspmv.c
  40. +134
    -0
      kernel/arm/KERNEL.ARMV5
  41. +12
    -3
      kernel/arm/KERNEL.ARMV6
  42. +16
    -11
      kernel/x86_64/KERNEL.NEHALEM
  43. +16
    -14
      kernel/x86_64/KERNEL.SANDYBRIDGE
  44. +19
    -0
      lapack-devel.log
  45. +28
    -34
      lapack-netlib/SRC/Makefile
  46. +1
    -1
      lapack-netlib/TESTING/ctest_rfp.in
  47. +1
    -1
      lapack-netlib/TESTING/svd.in
  48. +2
    -1
      lapack/Makefile
  49. +3
    -3
      make.inc
  50. +44
    -4
      param.h

+ 4
- 5
Makefile View File

@@ -262,11 +262,10 @@ endif


lapack-test :
$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
@rm -f $(NETLIB_LAPACK_DIR)/TESTING/*.out
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING
$(GREP) failed $(NETLIB_LAPACK_DIR)/TESTING/*.out
make -j 1 -C $(NETLIB_LAPACK_DIR) tmglib
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )


dummy :



+ 6
- 0
Makefile.arm View File

@@ -10,3 +10,9 @@ FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
endif


ifeq ($(CORE), ARMV5)
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
endif



+ 5
- 2
Makefile.rule View File

@@ -76,10 +76,10 @@ VERSION = 0.2.9.rc2
# Unfortunately most of kernel won't give us high quality buffer.
# BLAS tries to find the best region before entering main function,
# but it will consume time. If you don't like it, you can disable one.
# NO_WARMUP = 1
NO_WARMUP = 1

# If you want to disable CPU/Memory affinity on Linux.
# NO_AFFINITY = 1
NO_AFFINITY = 1

# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
# and OS. However, the performance is low.
@@ -129,6 +129,9 @@ VERSION = 0.2.9.rc2
# The default -O2 is enough.
# COMMON_OPT = -O2

# gfortran option for LAPACK
FCOMMON_OPT = -frecursive

# Profiling flags
COMMON_PROF = -pg



+ 1
- 13
Makefile.system View File

@@ -158,6 +158,7 @@ endif

ifeq ($(OSNAME), Linux)
EXTRALIB += -lm
NO_EXPRECISION = 1
endif

ifeq ($(OSNAME), AIX)
@@ -846,19 +847,6 @@ ifeq ($(DEBUG), 1)
COMMON_OPT += -g
endif

ifndef COMMON_OPT
ifeq ($(ARCH), arm)
COMMON_OPT = -O3
endif
endif

ifndef COMMON_OPT
ifeq ($(ARCH), arm64)
COMMON_OPT = -O3
endif
endif


ifndef COMMON_OPT
COMMON_OPT = -O2
endif


+ 8
- 1
common.h View File

@@ -310,10 +310,17 @@ typedef int blasint;
#define YIELDING SwitchToThread()
#endif

#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
#endif

#ifdef BULLDOZER
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
#endif


#ifdef PILEDRIVER
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");


+ 1
- 1
driver/others/Makefile View File

@@ -3,7 +3,7 @@ include ../../Makefile.system

COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX)

COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
#COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)

ifdef SMP
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)


+ 16
- 0
getarch.c View File

@@ -724,6 +724,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#endif

#ifdef FORCE_ARMV5
#define FORCE
#define ARCHITECTURE "ARM"
#define SUBARCHITECTURE "ARMV5"
#define SUBDIRNAME "arm"
#define ARCHCONFIG "-DARMV5 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
"-DHAVE_VFP"
#define LIBNAME "armv5"
#define CORENAME "ARMV5"
#else
#endif


#ifdef FORCE_ARMV8
#define FORCE
#define ARCHITECTURE "ARM64"


+ 65
- 44
interface/Makefile View File

@@ -2,11 +2,11 @@ TOPDIR = ..
include $(TOPDIR)/Makefile.system

ifeq ($(ARCH), x86)
SUPPORT_GEMM3M = 1
SUPPORT_GEMM3M = 0
endif

ifeq ($(ARCH), x86_64)
SUPPORT_GEMM3M = 1
SUPPORT_GEMM3M = 0
endif

ifeq ($(ARCH), ia64)
@@ -342,30 +342,51 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)

#SLAPACKOBJS = \
# sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
# spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
# slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \

SLAPACKOBJS = \
sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX)


#DLAPACKOBJS = \
# dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
# dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
# dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \

DLAPACKOBJS = \
dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX)

QLAPACKOBJS = \
qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \
qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \
qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \

#CLAPACKOBJS = \
# cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
# cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
# claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \

CLAPACKOBJS = \
cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX)


#ZLAPACKOBJS = \
# zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
# zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
# zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \

ZLAPACKOBJS = \
zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX)



XLAPACKOBJS = \
xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \
@@ -375,10 +396,10 @@ XLAPACKOBJS = \
ifneq ($(NO_LAPACK), 1)
SBLASOBJS += $(SLAPACKOBJS)
DBLASOBJS += $(DLAPACKOBJS)
QBLASOBJS += $(QLAPACKOBJS)
#QBLASOBJS += $(QLAPACKOBJS)
CBLASOBJS += $(CLAPACKOBJS)
ZBLASOBJS += $(ZLAPACKOBJS)
XBLASOBJS += $(XLAPACKOBJS)
#XBLASOBJS += $(XLAPACKOBJS)

endif

@@ -1731,37 +1752,37 @@ cblas_cher2k.$(SUFFIX) cblas_cher2k.$(PSUFFIX) : syr2k.c
cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c
$(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F)

sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : getf2.c
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : lapack/getf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : getf2.c
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : lapack/getf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

qgetf2.$(SUFFIX) qgetf2.$(PSUFFIX) : getf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : zgetf2.c
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : lapack/zgetf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : zgetf2.c
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : lapack/zgetf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

xgetf2.$(SUFFIX) xgetf2.$(PSUFFIX) : zgetf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : getrf.c
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : lapack/getrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)

dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : getrf.c
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : lapack/getrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)

qgetrf.$(SUFFIX) qgetrf.$(PSUFFIX) : getrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)

cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : zgetrf.c
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : lapack/zgetrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)

zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : zgetrf.c
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : lapack/zgetrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)

xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c
@@ -1803,37 +1824,37 @@ zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : zlauum.c
xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c
$(CC) -c $(CFLAGS) $< -o $(@F)

spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : potf2.c
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : lapack/potf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : potf2.c
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : lapack/potf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

qpotf2.$(SUFFIX) qpotf2.$(PSUFFIX) : potf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : zpotf2.c
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : lapack/zpotf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : zpotf2.c
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : lapack/zpotf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

xpotf2.$(SUFFIX) xpotf2.$(PSUFFIX) : zpotf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)

spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : potrf.c
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : lapack/potrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)

dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : potrf.c
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : lapack/potrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)

qpotrf.$(SUFFIX) qpotrf.$(PSUFFIX) : potrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)

cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : zpotrf.c
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : lapack/zpotrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)

zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : zpotrf.c
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : lapack/zpotrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)

xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c
@@ -1875,55 +1896,55 @@ ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : ztrtri.c
xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c
$(CC) -c $(CFLAGS) $< -o $(@F)

slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : laswp.c
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : lapack/laswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)

dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : laswp.c
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : lapack/laswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)

qlaswp.$(SUFFIX) qlaswp.$(PSUFFIX) : laswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)

claswp.$(SUFFIX) claswp.$(PSUFFIX) : zlaswp.c
claswp.$(SUFFIX) claswp.$(PSUFFIX) : lapack/zlaswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)

zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : zlaswp.c
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : lapack/zlaswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)

xlaswp.$(SUFFIX) xlaswp.$(PSUFFIX) : zlaswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)

sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : getrs.c
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : lapack/getrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)

dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : getrs.c
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : lapack/getrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)

qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : getrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)

cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : zgetrs.c
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)

zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : zgetrs.c
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)

xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)

sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : gesv.c
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)

dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : gesv.c
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)

qgesv.$(SUFFIX) qgesv.$(PSUFFIX) : gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)

cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : gesv.c
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)

zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : gesv.c
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)

xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c


interface/gesv.c → interface/lapack/gesv.c View File


interface/getf2.c → interface/lapack/getf2.c View File


interface/getrf.c → interface/lapack/getrf.c View File


interface/getrs.c → interface/lapack/getrs.c View File


interface/larf.c → interface/lapack/larf.c.obsolete View File


interface/laswp.c → interface/lapack/laswp.c View File


interface/lauu2.c → interface/lapack/lauu2.c.bad View File


interface/lauum.c → interface/lapack/lauum.c.bad View File


interface/potf2.c → interface/lapack/potf2.c View File


interface/potrf.c → interface/lapack/potrf.c View File


interface/potri.c → interface/lapack/potri.c.bad View File


interface/trti2.c → interface/lapack/trti2.c.bad View File


interface/trtri.c → interface/lapack/trtri.c.bad View File


interface/zgetf2.c → interface/lapack/zgetf2.c View File


interface/zgetrf.c → interface/lapack/zgetrf.c View File


interface/zgetrs.c → interface/lapack/zgetrs.c View File


interface/zlaswp.c → interface/lapack/zlaswp.c View File


interface/zlauu2.c → interface/lapack/zlauu2.c.bad View File


interface/zlauum.c → interface/lapack/zlauum.c.bad View File


interface/zpotf2.c → interface/lapack/zpotf2.c View File


interface/zpotrf.c → interface/lapack/zpotrf.c View File


interface/zpotri.c → interface/lapack/zpotri.c.bad View File


interface/ztrti2.c → interface/lapack/ztrti2.c.bad View File


interface/ztrtri.c → interface/lapack/ztrtri.c.bad View File


+ 5
- 5
interface/sbmv.c View File

@@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA
#endif
};

#ifdef SMP
#ifdef SMPBUG
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
qsbmv_thread_U, qsbmv_thread_L,
@@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif

@@ -130,7 +130,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
int uplo;
blasint info;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif

@@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,

buffer = (FLOAT *)blas_memory_alloc(1);

#ifdef SMP
#ifdef SMPBUG
nthreads = num_cpu_avail(2);

if (nthreads == 1) {
@@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,

(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);

#ifdef SMP
#ifdef SMPBUG
} else {

(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);


+ 5
- 5
interface/spmv.c View File

@@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLAS
#endif
};

#ifdef SMP
#ifdef SMPTEST
static int (*spmv_thread[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
qspmv_thread_U, qspmv_thread_L,
@@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPTEST
int nthreads;
#endif

@@ -126,7 +126,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
int uplo;
blasint info;
#ifdef SMP
#ifdef SMPTEST
int nthreads;
#endif

@@ -181,7 +181,7 @@ void CNAME(enum CBLAS_ORDER order,

buffer = (FLOAT *)blas_memory_alloc(1);

#ifdef SMP
#ifdef SMPTEST
nthreads = num_cpu_avail(2);

if (nthreads == 1) {
@@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,

(spmv[uplo])(n, alpha, a, x, incx, y, incy, buffer);

#ifdef SMP
#ifdef SMPTEST
} else {

(spmv_thread[uplo])(n, alpha, a, x, incx, y, incy, buffer, nthreads);


+ 12
- 3
interface/syr2k.c View File

@@ -145,12 +145,21 @@ void NAME(char *UPLO, char *TRANS,
if (uplo_arg == 'U') uplo = 0;
if (uplo_arg == 'L') uplo = 1;

#ifndef COMPLEX
if (trans_arg == 'N') trans = 0;
#ifndef HEMM
if (trans_arg == 'T') trans = 1;
if (trans_arg == 'R') trans = 0;
#endif
if (trans_arg == 'C') trans = 1;
#else
#ifdef HEMM
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'C') trans = 1;
#else
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'T') trans = 1;
#endif

#endif

nrowa = args.n;
if (trans & 1) nrowa = args.k;


+ 12
- 3
interface/syrk.c View File

@@ -148,12 +148,21 @@ void NAME(char *UPLO, char *TRANS,
if (uplo_arg == 'U') uplo = 0;
if (uplo_arg == 'L') uplo = 1;


#ifndef COMPLEX
if (trans_arg == 'N') trans = 0;
#ifndef HEMM
if (trans_arg == 'T') trans = 1;
if (trans_arg == 'R') trans = 0;
#endif
if (trans_arg == 'C') trans = 1;
#else
#ifdef HEMM
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'C') trans = 1;
#else
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'T') trans = 1;
#endif

#endif
nrowa = args.n;
if (trans & 1) nrowa = args.k;


+ 5
- 5
interface/zhbmv.c View File

@@ -61,7 +61,7 @@ static int (*hbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
#endif
};

#ifdef SMP
#ifdef SMPBUG
static int (*hbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xhbmv_thread_U, xhbmv_thread_L, xhbmv_thread_V, xhbmv_thread_M,
@@ -92,7 +92,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif

@@ -138,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
int uplo;
blasint info;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif

@@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,

buffer = (FLOAT *)blas_memory_alloc(1);

#ifdef SMP
#ifdef SMPBUG
nthreads = num_cpu_avail(2);

if (nthreads == 1) {
@@ -205,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,

(hbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer);

#ifdef SMP
#ifdef SMPBUG
} else {

(hbmv_thread[uplo])(n, k, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads);


+ 4
- 4
interface/zsbmv.c View File

@@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
#endif
};

#ifdef SMP
#ifdef SMPBUG
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xsbmv_thread_U, xsbmv_thread_L,
@@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif

@@ -131,7 +131,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *

buffer = (FLOAT *)blas_memory_alloc(1);

#ifdef SMP
#ifdef SMPBUG
nthreads = num_cpu_avail(2);

if (nthreads == 1) {
@@ -139,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *

(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);

#ifdef SMP
#ifdef SMPBUG
} else {

(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);


+ 4
- 4
interface/zspmv.c View File

@@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT
#endif
};

#ifdef SMP
#ifdef SMPTEST
static int (*spmv_thread[])(BLASLONG, FLOAT *, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xspmv_thread_U, xspmv_thread_L,
@@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPTEST
int nthreads;
#endif

@@ -127,7 +127,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,

buffer = (FLOAT *)blas_memory_alloc(1);

#ifdef SMP
#ifdef SMPTEST
nthreads = num_cpu_avail(2);

if (nthreads == 1) {
@@ -135,7 +135,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,

(spmv[uplo])(n, alpha_r, alpha_i, a, b, incx, c, incy, buffer);

#ifdef SMP
#ifdef SMPTEST

} else {



+ 134
- 0
kernel/arm/KERNEL.ARMV5 View File

@@ -0,0 +1,134 @@
SAMAXKERNEL = ../arm/amax.c
DAMAXKERNEL = ../arm/amax.c
CAMAXKERNEL = ../arm/zamax.c
ZAMAXKERNEL = ../arm/zamax.c

SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c

SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c

SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c

ISAMAXKERNEL = ../arm/iamax.c
IDAMAXKERNEL = ../arm/iamax.c
ICAMAXKERNEL = ../arm/izamax.c
IZAMAXKERNEL = ../arm/izamax.c

ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c

ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c

ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c

SASUMKERNEL = ../arm/asum.c
DASUMKERNEL = ../arm/asum.c
CASUMKERNEL = ../arm/zasum.c
ZASUMKERNEL = ../arm/zasum.c

SAXPYKERNEL = ../arm/axpy.c
DAXPYKERNEL = ../arm/axpy.c
CAXPYKERNEL = ../arm/zaxpy.c
ZAXPYKERNEL = ../arm/zaxpy.c

SCOPYKERNEL = ../arm/copy.c
DCOPYKERNEL = ../arm/copy.c
CCOPYKERNEL = ../arm/zcopy.c
ZCOPYKERNEL = ../arm/zcopy.c

SDOTKERNEL = ../arm/dot.c
DDOTKERNEL = ../arm/dot.c
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c

SNRM2KERNEL = ../arm/nrm2.c
DNRM2KERNEL = ../arm/nrm2.c
CNRM2KERNEL = ../arm/znrm2.c
ZNRM2KERNEL = ../arm/znrm2.c

SROTKERNEL = ../arm/rot.c
DROTKERNEL = ../arm/rot.c
CROTKERNEL = ../arm/zrot.c
ZROTKERNEL = ../arm/zrot.c

SSCALKERNEL = ../arm/scal.c
DSCALKERNEL = ../arm/scal.c
CSCALKERNEL = ../arm/zscal.c
ZSCALKERNEL = ../arm/zscal.c

SSWAPKERNEL = ../arm/swap.c
DSWAPKERNEL = ../arm/swap.c
CSWAPKERNEL = ../arm/zswap.c
ZSWAPKERNEL = ../arm/zswap.c

SGEMVNKERNEL = ../arm/gemv_n.c
DGEMVNKERNEL = ../arm/gemv_n.c
CGEMVNKERNEL = ../arm/zgemv_n.c
ZGEMVNKERNEL = ../arm/zgemv_n.c

SGEMVTKERNEL = ../arm/gemv_t.c
DGEMVTKERNEL = ../arm/gemv_t.c
CGEMVTKERNEL = ../arm/zgemv_t.c
ZGEMVTKERNEL = ../arm/zgemv_t.c

STRMMKERNEL = ../generic/trmmkernel_2x2.c
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c

SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o

DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o

CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o

ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c





+ 12
- 3
kernel/arm/KERNEL.ARMV6 View File

@@ -1,11 +1,20 @@
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c

DGEMVNKERNEL = ../arm/gemv_n.c
DGEMVTKERNEL = ../arm/gemv_t.c

CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c

#ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
#ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
#ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
#ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c


#STRMMKERNEL = ../generic/trmmkernel_2x2.c
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
#SGEMMONCOPY = ../generic/gemm_ncopy_2.c
@@ -86,18 +95,18 @@ CSWAPKERNEL = swap_vfp.S
ZSWAPKERNEL = swap_vfp.S

# BAD SGEMVNKERNEL = gemv_n_vfp.S
DGEMVNKERNEL = gemv_n_vfp.S
# BAD DGEMVNKERNEL = gemv_n_vfp.S
CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S

# BAD SGEMVTKERNEL = gemv_t_vfp.S
DGEMVTKERNEL = gemv_t_vfp.S
# BAD DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S

STRMMKERNEL = strmm_kernel_4x2_vfp.S
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
# CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
#CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S

SGEMMKERNEL = sgemm_kernel_4x2_vfp.S


+ 16
- 11
kernel/x86_64/KERNEL.NEHALEM View File

@@ -7,15 +7,19 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = gemm_kernel_2x8_nehalem.S
DGEMMINCOPY = dgemm_ncopy_2.S
DGEMMITCOPY = dgemm_tcopy_2.S
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
DGEMMOTCOPY = dgemm_tcopy_8.S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)


DGEMMKERNEL = gemm_kernel_4x4_core2.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = gemm_ncopy_4.S
DGEMMOTCOPY = gemm_tcopy_4.S
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)


CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
CGEMMINCOPY = zgemm_ncopy_2.S
CGEMMITCOPY = zgemm_tcopy_2.S
@@ -40,10 +44,11 @@ STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S

DTRSMKERNEL_LN = trsm_kernel_LN_2x8_nehalem.S
DTRSMKERNEL_LT = trsm_kernel_LT_2x8_nehalem.S
DTRSMKERNEL_RN = trsm_kernel_LT_2x8_nehalem.S
DTRSMKERNEL_RT = trsm_kernel_RT_2x8_nehalem.S
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_core2.S
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_core2.S
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_core2.S
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_core2.S


CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S


+ 16
- 14
kernel/x86_64/KERNEL.SANDYBRIDGE View File

@@ -1,34 +1,35 @@
SGEMMKERNEL = sgemm_kernel_8x8_sandy.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMKERNEL = gemm_kernel_4x8_nehalem.S
SGEMMINCOPY = gemm_ncopy_4.S
SGEMMITCOPY = gemm_tcopy_4.S
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)


DGEMMKERNEL = dgemm_kernel_4x8_sandy.S
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
#DGEMMONCOPY = gemm_ncopy_4.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
#DGEMMOTCOPY = gemm_tcopy_4.S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
#CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
CGEMMKERNEL = cgemm_kernel_4x8_sandy.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8_sandy.c
CGEMMITCOPY = ../generic/zgemm_tcopy_8_sandy.c
CGEMMONCOPY = ../generic/zgemm_ncopy_4_sandy.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4_sandy.c
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
CGEMMINCOPY = zgemm_ncopy_2.S
CGEMMITCOPY = zgemm_tcopy_2.S
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
#ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S


ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S
ZGEMMINCOPY =
ZGEMMITCOPY =
@@ -58,6 +59,7 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
#ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x4_nehalem.S
#ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x4_nehalem.S
#ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x4_nehalem.S

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c


+ 19
- 0
lapack-devel.log View File

@@ -0,0 +1,19 @@
========================================================================================
2014/05/07 Saar

Platform: BULLDOZER single thread


--> LAPACK TESTING SUMMARY <--
Processing LAPACK Testing output found in the TESTING direcory
SUMMARY nb test run numerical error other error
================ =========== ================= ================
REAL 1079349 0 (0.000%) 0 (0.000%)
DOUBLE PRECISION 1080161 0 (0.000%) 0 (0.000%)
COMPLEX 556022 0 (0.000%) 0 (0.000%)
COMPLEX16 556834 0 (0.000%) 0 (0.000%)

--> ALL PRECISIONS 3272366 0 (0.000%) 0 (0.000%)

========================================================================================


+ 28
- 34
lapack-netlib/SRC/Makefile View File

@@ -54,9 +54,9 @@ include ../make.inc
#
#######################################################################

ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla_array.o iparmq.o \
ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o \
ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
../INSTALL/ilaver.o
../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o

SCLAUX = \
sbdsdc.o \
@@ -92,7 +92,7 @@ DZLAUX = \
dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
dsteqr.o dsterf.o dlaisnan.o disnan.o \
dlartgp.o dlartgs.o \
../INSTALL/dsecnd_$(TIMER).o
../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o

SLASRC = \
sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
@@ -101,7 +101,7 @@ SLASRC = \
sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
sgeqp3.o sgeqpf.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o \
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesvd.o sgesvx.o \
sgetc2.o sgetri.o \
sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
sggglm.o sgghrd.o sgglse.o sggqrf.o \
@@ -120,7 +120,7 @@ SLASRC = \
slarrv.o slartv.o \
slarz.o slarzb.o slarzt.o slasy2.o slasyf.o slasyf_rook.o \
slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
@@ -147,7 +147,7 @@ SLASRC = \
stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
stptrs.o \
strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
strtrs.o stzrqf.o stzrzf.o sstemr.o \
strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o \
slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
@@ -157,7 +157,7 @@ SLASRC = \
sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o

DSLASRC = spotrs.o
DSLASRC = spotrs.o

ifdef USEXBLAS
SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
@@ -176,7 +176,7 @@ CLASRC = \
cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
cgeqpf.o cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o \
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesvd.o \
cgesvx.o cgetc2.o cgetri.o \
cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
cgghrd.o cgglse.o cggqrf.o cggrqf.o \
@@ -208,7 +208,7 @@ CLASRC = \
clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
clasyf.o clasyf_rook.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
clatzm.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
cposv.o cposvx.o cpotri.o cpstrf.o cpstf2.o \
cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
@@ -225,7 +225,7 @@ CLASRC = \
ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
ctprfs.o ctptri.o \
ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
ctrsyl.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
@@ -252,7 +252,7 @@ CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o
endif

ZCLASRC = cpotrs.o
ZCLASRC = cpotrs.o

DLASRC = \
dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
@@ -261,7 +261,7 @@ DLASRC = \
dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
dgeqp3.o dgeqpf.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o \
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesvd.o dgesvx.o \
dgetc2.o dgetri.o \
dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
dggglm.o dgghrd.o dgglse.o dggqrf.o \
@@ -279,8 +279,8 @@ DLASRC = \
dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \
dlargv.o dlarrv.o dlartv.o \
dlarz.o dlarzb.o dlarzt.o dlasy2.o dlasyf.o dlasyf_rook.o \
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o \
dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \
dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
@@ -307,7 +307,7 @@ DLASRC = \
dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
dtptrs.o \
dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
@@ -335,8 +335,8 @@ ZLASRC = \
zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
zgeqpf.o zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \
zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o \
zgetri.o \
zgesc2.o zgesdd.o zgesvd.o zgesvx.o zgetc2.o \
zgetri.o \
zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
zgghrd.o zgglse.o zggqrf.o zggrqf.o \
zggsvd.o zggsvp.o \
@@ -370,7 +370,7 @@ ZLASRC = \
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
zlassq.o zlasyf.o zlasyf_rook.o \
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
zposv.o zposvx.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
@@ -387,7 +387,7 @@ ZLASRC = \
ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
ztprfs.o ztptri.o \
ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
ztrsyl.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
@@ -417,8 +417,6 @@ endif
ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \
$(SCLAUX) $(DZLAUX) $(ALLAUX)

ALLOBJ_P = $(ALLOBJ:.o=.$(PSUFFIX))

ifdef USEXBLAS
ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
endif
@@ -435,6 +433,7 @@ lapacklib: $(ALLOBJ) $(ALLXOBJ)
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
$(RANLIB) $@


single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
@@ -483,16 +482,11 @@ clean:
%.$(PSUFFIX): %.f
$(FORTRAN) $(POPTS) -c $< -o $@

slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@

slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
sla_wwaddw.$(PSUFFIX): sla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
dla_wwaddw.$(PSUFFIX): dla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
cla_wwaddw.$(PSUFFIX): cla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
zla_wwaddw.$(PSUFFIX): zla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@

slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@


+ 1
- 1
lapack-netlib/TESTING/ctest_rfp.in View File

@@ -5,5 +5,5 @@ Data file for testing COMPLEX LAPACK linear equation routines RFP format
1 2 15 Values of NRHS (number of right hand sides)
9 Number of matrix types (list types on next line if 0 < NTYPES < 9)
1 2 3 4 5 6 7 8 9 Matrix Types
30.0 Threshold value of test ratio
50.0 Threshold value of test ratio
T Put T to test the error exits

+ 1
- 1
lapack-netlib/TESTING/svd.in View File

@@ -7,7 +7,7 @@ SVD: Data file for testing Singular Value Decomposition routines
2 2 2 2 2 Values of NBMIN (minimum blocksize)
1 0 5 9 1 Values of NX (crossover point)
2 0 2 2 2 Values of NRHS
50.0 Threshold value
54.0 Threshold value
T Put T to test the LAPACK routines
T Put T to test the driver routines
T Put T to test the error exits


+ 2
- 1
lapack/Makefile View File

@@ -1,7 +1,8 @@
TOPDIR = ..
include ../Makefile.system

SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
#SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
SUBDIRS = getrf getf2 laswp getrs potrf potf2

FLAMEDIRS = laswp getf2 potf2 lauu2 trti2



+ 3
- 3
make.inc View File

@@ -5,7 +5,7 @@ LOADER = $(FORTRAN)
TIMER = NONE
ARCHFLAGS= -ru
#RANLIB = ranlib
BLASLIB =
BLASLIB = ../../../libopenblas.a
TMGLIB = tmglib.a
EIGSRCLIB = eigsrc.a
LINSRCLIB = linsrc.a
#EIGSRCLIB = eigsrc.a
#LINSRCLIB = linsrc.a

+ 44
- 4
param.h View File

@@ -1032,14 +1032,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define XGEMM_DEFAULT_UNROLL_N 1
#else
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_M 4
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1

#define SGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_N 4
@@ -1104,10 +1104,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1
#else
#define SGEMM_DEFAULT_UNROLL_M 8
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 8
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 4
#define XGEMM_DEFAULT_UNROLL_M 1

@@ -2021,6 +2021,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif


#if defined(ARMV5)
#define SNUMOPT 2
#define DNUMOPT 2

#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL

#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 2

#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_N 2

#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2

#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2

#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_P 96
#define ZGEMM_DEFAULT_P 64

#define SGEMM_DEFAULT_Q 240
#define DGEMM_DEFAULT_Q 120
#define CGEMM_DEFAULT_Q 120
#define ZGEMM_DEFAULT_Q 120

#define SGEMM_DEFAULT_R 12288
#define DGEMM_DEFAULT_R 8192
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096


#define SYMV_P 16
#endif




#ifdef GENERIC


Loading…
Cancel
Save