Browse Source

Merge pull request #411 from wernsaar/develop

Lapack-test on x86 32bit now runs without errors.
tags/v0.2.10
Zhang Xianyi 11 years ago
parent
commit
7961404a40
3 changed files with 58 additions and 297 deletions
  1. +19
    -18
      interface/Makefile
  2. +35
    -275
      kernel/x86/KERNEL
  3. +4
    -4
      lapack-netlib/SRC/Makefile

+ 19
- 18
interface/Makefile View File

@@ -356,25 +356,25 @@ ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)

#SLAPACKOBJS = \
# sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
# spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
# slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
# sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
# spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \
# slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX)

SLAPACKOBJS = \
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \
slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX)
slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX)


#DLAPACKOBJS = \
# dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
# dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
# dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
# dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
# dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \
# dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX)

DLAPACKOBJS = \
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \
dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX)
dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX)


QLAPACKOBJS = \
@@ -382,28 +382,29 @@ QLAPACKOBJS = \
qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \
qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \


#CLAPACKOBJS = \
# cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
# cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
# claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
# cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
# cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
# clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)


CLAPACKOBJS = \
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX)


#ZLAPACKOBJS = \
# zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
# zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
# zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
# zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
# zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
# zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX)


ZLAPACKOBJS = \
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX)


zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX)


XLAPACKOBJS = \


+ 35
- 275
kernel/x86/KERNEL View File

@@ -1,263 +1,3 @@
GEMVDEP = ../l2param.h

ifdef HAVE_SSE

ifndef SAMAXKERNEL
SAMAXKERNEL = amax_sse.S
endif

ifndef CAMAXKERNEL
CAMAXKERNEL = zamax_sse.S
endif

ifndef SAMINKERNEL
SAMINKERNEL = amax_sse.S
endif

ifndef CAMINKERNEL
CAMINKERNEL = zamax_sse.S
endif

ifndef ISAMAXKERNEL
ISAMAXKERNEL = iamax_sse.S
endif

ifndef ICAMAXKERNEL
ICAMAXKERNEL = izamax_sse.S
endif

ifndef ISAMINKERNEL
ISAMINKERNEL = iamax_sse.S
endif

ifndef ICAMINKERNEL
ICAMINKERNEL = izamax_sse.S
endif

ifndef ISMAXKERNEL
ISMAXKERNEL = iamax_sse.S
endif

ifndef ISMINKERNEL
ISMINKERNEL = iamax_sse.S
endif

ifndef SMAXKERNEL
SMAXKERNEL = amax_sse.S
endif

ifndef SMINKERNEL
SMINKERNEL = amax_sse.S
endif

ifndef SASUMKERNEL
SASUMKERNEL = asum_sse.S
endif

ifndef CASUMKERNEL
CASUMKERNEL = zasum_sse.S
endif

ifndef SDOTKERNEL
SDOTKERNEL = ../arm/dot.c
endif

ifndef CDOTKERNEL
CDOTKERNEL = zdot_sse.S
endif

ifndef SCOPYKERNEL
SCOPYKERNEL = copy_sse.S
endif

ifndef CCOPYKERNEL
CCOPYKERNEL = zcopy_sse.S
endif

ifndef SSACALKERNEL
SSCALKERNEL = scal_sse.S
endif

ifndef CSACALKERNEL
CSCALKERNEL = zscal_sse.S
endif

ifndef SAXPYKERNEL
SAXPYKERNEL = axpy_sse.S
endif

ifndef CAXPYKERNEL
CAXPYKERNEL = zaxpy_sse.S
endif

ifndef SROTKERNEL
SROTKERNEL = rot_sse.S
endif

ifndef CROTKERNEL
CROTKERNEL = zrot_sse.S
endif

ifndef SSWAPKERNEL
SSWAPKERNEL = swap_sse.S
endif

ifndef CSWAPKERNEL
CSWAPKERNEL = zswap_sse.S
endif

ifndef SGEMVNKERNEL
SGEMVNKERNEL = ../arm/gemv_n.c
endif

ifndef SGEMVTKERNEL
SGEMVTKERNEL = ../arm/gemv_t.c
endif

ifndef CGEMVNKERNEL
CGEMVNKERNEL = zgemv_n_sse.S
endif

ifndef CGEMVTKERNEL
CGEMVTKERNEL = zgemv_t_sse.S
endif

endif


ifdef HAVE_SSE2

ifndef DAMAXKERNEL
DAMAXKERNEL = amax_sse2.S
endif

ifndef ZAMAXKERNEL
ZAMAXKERNEL = zamax_sse2.S
endif

ifndef DAMINKERNEL
DAMINKERNEL = amax_sse2.S
endif

ifndef ZAMINKERNEL
ZAMINKERNEL = zamax_sse2.S
endif

ifndef IDAMAXKERNEL
IDAMAXKERNEL = iamax_sse2.S
endif

ifndef IZAMAXKERNEL
IZAMAXKERNEL = izamax_sse2.S
endif

ifndef IDAMINKERNEL
IDAMINKERNEL = iamax_sse2.S
endif

ifndef IZAMINKERNEL
IZAMINKERNEL = izamax_sse2.S
endif

ifndef IDMAXKERNEL
IDMAXKERNEL = iamax_sse2.S
endif

ifndef IDMINKERNEL
IDMINKERNEL = iamax_sse2.S
endif

ifndef DMAXKERNEL
DMAXKERNEL = amax_sse2.S
endif

ifndef DMINKERNEL
DMINKERNEL = amax_sse2.S
endif

ifndef DDOTKERNEL
DDOTKERNEL = dot_sse2.S
endif

ifndef ZDOTKERNEL
ZDOTKERNEL = zdot_sse2.S
endif

ifndef DCOPYKERNEL
# DCOPYKERNEL = copy_sse2.S
endif

ifndef ZCOPYKERNEL
ZCOPYKERNEL = zcopy_sse2.S
endif

ifndef DSACALKERNEL
DSCALKERNEL = scal_sse2.S
endif

ifndef ZSACALKERNEL
ZSCALKERNEL = zscal_sse2.S
endif

ifndef DASUMKERNEL
DASUMKERNEL = asum_sse2.S
endif

ifndef ZASUMKERNEL
ZASUMKERNEL = zasum_sse2.S
endif

ifndef DAXPYKERNEL
DAXPYKERNEL = axpy_sse2.S
endif

ifndef ZAXPYKERNEL
ZAXPYKERNEL = zaxpy_sse2.S
endif

ifndef SNRM2KERNEL
SNRM2KERNEL = nrm2_sse.S
endif

ifndef CNRM2KERNEL
CNRM2KERNEL = znrm2_sse.S
endif

ifndef DROTKERNEL
DROTKERNEL = rot_sse2.S
endif

ifndef ZROTKERNEL
ZROTKERNEL = zrot_sse2.S
endif

ifndef DSWAPKERNEL
DSWAPKERNEL = swap_sse2.S
endif

ifndef ZSWAPKERNEL
ZSWAPKERNEL = zswap_sse2.S
endif

ifndef DGEMVNKERNEL
DGEMVNKERNEL = gemv_n_sse2.S
endif

ifndef DGEMVTKERNEL
DGEMVTKERNEL = gemv_t_sse2.S
endif

ifndef ZGEMVNKERNEL
ZGEMVNKERNEL = zgemv_n_sse2.S
endif

ifndef ZGEMVTKERNEL
ZGEMVTKERNEL = zgemv_t_sse2.S
endif

endif


ifndef SAMINKERNEL
SAMINKERNEL = amax.S
endif
@@ -394,21 +134,41 @@ XGEMMITCOPYOBJ =
XGEMMONCOPYOBJ = xgemm_oncopy$(TSUFFIX).$(SUFFIX)
XGEMMOTCOPYOBJ = xgemm_otcopy$(TSUFFIX).$(SUFFIX)

SGEMM_BETA = gemm_beta.S
DGEMM_BETA = gemm_beta.S
QGEMM_BETA = ../generic/gemm_beta.c
CGEMM_BETA = zgemm_beta.S
ZGEMM_BETA = zgemm_beta.S
XGEMM_BETA = ../generic/zgemm_beta.c

QTRSMKERNEL_LN = qtrsm_kernel_LN_2x2.S
QTRSMKERNEL_LT = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RN = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RT = qtrsm_kernel_RT_2x2.S
QTRSMKERNEL_LN = qtrsm_kernel_LN_2x2.S
QTRSMKERNEL_LT = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RN = qtrsm_kernel_LT_2x2.S
QTRSMKERNEL_RT = qtrsm_kernel_RT_2x2.S

XTRSMKERNEL_LN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_LT = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_LN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_LT = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S
XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S

XGEMM3MKERNEL = xgemm3m_kernel_2x2.S


# bug in zdot assembler kernel
ifndef ZDOTKERNEL
ZDOTKERNEL = ../arm/zdot.c
endif

DSDOTKERNEL = ../arm/dot.c

# Bug in znrm2 assembler kernel
ifndef ZNRM2KERNEL
ZNRM2KERNEL = ../arm/znrm2.c
endif

# Bug in zgemv_t assembler kernel
ifndef ZGEMVTKERNEL
ZGEMVTKERNEL = ../arm/zgemv_t.c
endif

SGEMM_BETA = ../generic/gemm_beta.c
DGEMM_BETA = ../generic/gemm_beta.c
CGEMM_BETA = ../generic/zgemm_beta.c
ZGEMM_BETA = ../generic/zgemm_beta.c

QGEMM_BETA = ../generic/gemm_beta.c
XGEMM_BETA = ../generic/zgemm_beta.c


+ 4
- 4
lapack-netlib/SRC/Makefile View File

@@ -155,7 +155,7 @@ SLASRC = \
sbbcsd.o slapmr.o sorbdb.o sorbdb1.o sorbdb2.o sorbdb3.o sorbdb4.o \
sorbdb5.o sorbdb6.o sorcsd.o sorcsd2by1.o \
sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o spotri.o

DSLASRC = spotrs.o

@@ -236,7 +236,7 @@ CLASRC = \
cbbcsd.o clapmr.o cunbdb.o cunbdb1.o cunbdb2.o cunbdb3.o cunbdb4.o \
cunbdb5.o cunbdb6.o cuncsd.o cuncsd2by1.o \
cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \
ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o
ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o cpotri.o

ifdef USEXBLAS
CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
@@ -316,7 +316,7 @@ DLASRC = \
dbbcsd.o dlapmr.o dorbdb.o dorbdb1.o dorbdb2.o dorbdb3.o dorbdb4.o \
dorbdb5.o dorbdb6.o dorcsd.o dorcsd2by1.o \
dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \
dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o
dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o dpotri.o

ifdef USEXBLAS
DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
@@ -400,7 +400,7 @@ ZLASRC = \
zbbcsd.o zlapmr.o zunbdb.o zunbdb1.o zunbdb2.o zunbdb3.o zunbdb4.o \
zunbdb5.o zunbdb6.o zuncsd.o zuncsd2by1.o \
zgeqrt.o zgeqrt2.o zgeqrt3.o zgemqrt.o \
ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o
ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o zpotri.o

ifdef USEXBLAS
ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \


Loading…
Cancel
Save