| @@ -79,5 +79,9 @@ In chronological order: | |||
| * [2013-07-11] create openblas_get_parallel to retrieve information which parallelization | |||
| model is used by OpenBLAS. | |||
| * Sébastien Fabbro <bicatali@gentoo.org> | |||
| * [2013-07-24] Modify makefile to respect user's LDFLAGS | |||
| * [2013-07-24] Add stack markings for GNU as arch-independent for assembler files | |||
| * [Your name or handle] <[email or website]> | |||
| * [Date] [Brief summary of your changes] | |||
| @@ -1,4 +1,16 @@ | |||
| OpenBLAS ChangeLog | |||
| ==================================================================== | |||
| Version 0.2.8 | |||
| 01-Aug-2013 | |||
| common: | |||
| * Support Open64 5.0. (#266) | |||
| * Add executable stack markings. (#262, Thank Sébastien Fabbro) | |||
| * Respect user's LDFLAGS (Thank Sébastien Fabbro) | |||
| x86/x86-64: | |||
| * Rollback bulldozer and piledriver kernels to barcelona kernels (#263) | |||
| We will fix the compuational error bug in bulldozer and piledriver kernels. | |||
| ==================================================================== | |||
| Version 0.2.7 | |||
| 20-Jul-2013 | |||
| @@ -1,6 +1 @@ | |||
| COPT = -Wall -O2 # -DGEMMTEST | |||
| ifdef BINARY64 | |||
| else | |||
| # LDFLAGS = -m elf32ppc | |||
| LDFLAGS = -m elf_i386 | |||
| endif | |||
| @@ -17,13 +17,7 @@ endif | |||
| endif | |||
| ifdef BINARY64 | |||
| ifeq ($(OSNAME), Linux) | |||
| LDFLAGS = -m elf64ppc | |||
| endif | |||
| ifeq ($(OSNAME), Darwin) | |||
| LDFLAGS = -arch ppc64 | |||
| endif | |||
| ifeq ($(OSNAME), AIX) | |||
| CCOMMON_OPT += -mpowerpc64 -maix64 | |||
| @@ -34,17 +28,12 @@ ifeq ($(COMPILER_F77), xlf) | |||
| FCOMMON_OPT += -q64 | |||
| endif | |||
| ARFLAGS = -X 64 | |||
| LDFLAGS = -b64 | |||
| ASFLAGS = -a64 | |||
| endif | |||
| else | |||
| ifeq ($(OSNAME), Linux) | |||
| LDFLAGS = -m elf32ppc | |||
| endif | |||
| ifeq ($(OSNAME), AIX) | |||
| CCOMMON_OPT += -Wa,-a32 | |||
| ARFLAGS = -X 32 | |||
| LDFLAGS = -b32 | |||
| ASFLAGS = -a32 | |||
| endif | |||
| endif | |||
| @@ -3,7 +3,7 @@ | |||
| # | |||
| # This library's version | |||
| VERSION = 0.2.7 | |||
| VERSION = 0.2.8 | |||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | |||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | |||
| @@ -10,7 +10,6 @@ endif | |||
| ifeq ($(COMPILER_F77), f90) | |||
| FCOMMON_OPT += -xarch=v9 | |||
| endif | |||
| LDFLAGS = -64 | |||
| else | |||
| CCOMMON_OPT += -mcpu=v9 | |||
| @@ -324,14 +324,16 @@ ifeq ($(ARCH), x86) | |||
| DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | |||
| CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
| ifneq ($(NO_AVX), 1) | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||
| DYNAMIC_CORE += SANDYBRIDGE | |||
| #BULLDOZER PILEDRIVER | |||
| endif | |||
| endif | |||
| ifeq ($(ARCH), x86_64) | |||
| DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
| ifneq ($(NO_AVX), 1) | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||
| DYNAMIC_CORE += SANDYBRIDGE | |||
| #BULLDOZER PILEDRIVER | |||
| endif | |||
| endif | |||
| @@ -895,6 +897,7 @@ export CC | |||
| export FC | |||
| export BU | |||
| export FU | |||
| export NEED2UNDERSCORES | |||
| export USE_THREAD | |||
| export NUM_THREADS | |||
| export NUM_CORES | |||
| @@ -1,8 +1,5 @@ | |||
| # COMPILER_PREFIX = mingw32- | |||
| ifeq ($(OSNAME), Linux) | |||
| LDFLAGS = -melf_i386 | |||
| endif | |||
| ifeq ($(OSNAME), Interix) | |||
| ARFLAGS = -m x86 | |||
| @@ -2,25 +2,12 @@ | |||
| ifeq ($(OSNAME), SunOS) | |||
| ifdef BINARY64 | |||
| LDFLAGS = -64 | |||
| ifeq ($(F_COMPILER), SUN) | |||
| FCOMMON_OPT += -m64 | |||
| endif | |||
| endif | |||
| endif | |||
| ifeq ($(OSNAME), FreeBSD) | |||
| LDFLAGS = -m elf_x86_64_fbsd | |||
| endif | |||
| ifeq ($(OSNAME), Linux) | |||
| LDFLAGS = -m elf_x86_64 | |||
| endif | |||
| ifeq ($(OSNAME), Darwin) | |||
| LDFLAGS = | |||
| endif | |||
| ifeq ($(OSNAME), Interix) | |||
| ARFLAGS = -m x64 | |||
| endif | |||
| @@ -150,9 +150,17 @@ REALNAME: | |||
| #define PROFCODE .prologue 0 | |||
| #endif | |||
| #if defined(__linux__) && defined(__ELF__) | |||
| #define GNUSTACK .section .note.GNU-stack,"",%progbits | |||
| #else | |||
| #define GNUSTACK | |||
| #endif | |||
| #define EPILOGUE \ | |||
| .end REALNAME; \ | |||
| .ident VERSION | |||
| .ident VERSION; \ | |||
| GNUSTACK | |||
| #endif | |||
| #ifdef DOUBLE | |||
| @@ -379,8 +379,15 @@ REALNAME: | |||
| #define PROFCODE | |||
| #endif | |||
| #if defined(__linux__) && defined(__ELF__) | |||
| #define GNUSTACK .section .note.GNU-stack,"",%progbits | |||
| #else | |||
| #define GNUSTACK | |||
| #endif | |||
| #define EPILOGUE \ | |||
| .endp REALNAME | |||
| .endp REALNAME ; \ | |||
| GNUSTACK | |||
| #define START_ADDRESS 0x20000fc800000000UL | |||
| @@ -235,10 +235,17 @@ REALNAME: ;\ | |||
| .set noreorder ;\ | |||
| .set nomacro | |||
| #if defined(__linux__) && defined(__ELF__) | |||
| #define GNUSTACK .section .note.GNU-stack,"",%progbits | |||
| #else | |||
| #define GNUSTACK | |||
| #endif | |||
| #define EPILOGUE \ | |||
| .set macro ;\ | |||
| .set reorder ;\ | |||
| .end REALNAME | |||
| .end REALNAME ;\ | |||
| GNUSTACK | |||
| #define PROFCODE | |||
| #endif | |||
| @@ -199,8 +199,17 @@ static __inline int blas_quickdivide(blasint x, blasint y){ | |||
| .type REALNAME, #function; \ | |||
| .proc 07; \ | |||
| REALNAME:; | |||
| #if defined(__linux__) && defined(__ELF__) | |||
| #define GNUSTACK .section .note.GNU-stack,"",%progbits | |||
| #else | |||
| #define GNUSTACK | |||
| #endif | |||
| #define EPILOGUE \ | |||
| .size REALNAME, .-REALNAME | |||
| .size REALNAME, .-REALNAME; \ | |||
| GNUSTACK | |||
| #endif | |||
| #endif | |||
| @@ -301,7 +301,9 @@ REALNAME: | |||
| #define PROFCODE | |||
| #endif | |||
| #define EPILOGUE .size REALNAME, .-REALNAME | |||
| #define EPILOGUE \ | |||
| .size REALNAME, .-REALNAME; \ | |||
| .section .note.GNU-stack,"",%progbits | |||
| #endif | |||
| @@ -372,7 +372,10 @@ REALNAME: | |||
| #define PROFCODE | |||
| #endif | |||
| #define EPILOGUE .size REALNAME, .-REALNAME | |||
| #define EPILOGUE \ | |||
| .size REALNAME, .-REALNAME; \ | |||
| .section .note.GNU-stack,"",%progbits | |||
| #endif | |||
| @@ -105,8 +105,8 @@ | |||
| #define CORE_NANO 19 | |||
| #define CORE_SANDYBRIDGE 20 | |||
| #define CORE_BOBCAT 21 | |||
| #define CORE_BULLDOZER 22 | |||
| #define CORE_PILEDRIVER 23 | |||
| #define CORE_BULLDOZER CORE_BARCELONA | |||
| #define CORE_PILEDRIVER CORE_BARCELONA | |||
| #define CORE_HASWELL CORE_SANDYBRIDGE | |||
| #define HAVE_SSE (1 << 0) | |||
| @@ -198,8 +198,8 @@ typedef struct { | |||
| #define CPUTYPE_NANO 43 | |||
| #define CPUTYPE_SANDYBRIDGE 44 | |||
| #define CPUTYPE_BOBCAT 45 | |||
| #define CPUTYPE_BULLDOZER 46 | |||
| #define CPUTYPE_PILEDRIVER 47 | |||
| #define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA | |||
| #define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA | |||
| // this define is because BLAS doesn't have haswell specific optimizations yet | |||
| #define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE | |||
| @@ -229,8 +229,8 @@ int get_cputype(int gettype){ | |||
| if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2; | |||
| #ifndef NO_AVX | |||
| if (support_avx()) feature |= HAVE_AVX; | |||
| if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3; | |||
| #endif | |||
| if ((ecx & (1 << 20)) != 0) feature |= HAVE_FMA3; | |||
| if (have_excpuid() >= 0x01) { | |||
| cpuid(0x80000001, &eax, &ebx, &ecx, &edx); | |||
| @@ -77,7 +77,7 @@ endif | |||
| clean :: | |||
| rm -f x* | |||
| FLDFLAGS = $(FFLAGS:-fPIC=) | |||
| FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS) | |||
| CEXTRALIB = | |||
| # Single real | |||
| @@ -63,14 +63,16 @@ extern gotoblas_t gotoblas_BARCELONA; | |||
| extern gotoblas_t gotoblas_BOBCAT; | |||
| #ifndef NO_AVX | |||
| extern gotoblas_t gotoblas_SANDYBRIDGE; | |||
| extern gotoblas_t gotoblas_BULLDOZER; | |||
| extern gotoblas_t gotoblas_PILEDRIVER; | |||
| //extern gotoblas_t gotoblas_BULLDOZER; | |||
| //extern gotoblas_t gotoblas_PILEDRIVER; | |||
| #else | |||
| //Use NEHALEM kernels for sandy bridge | |||
| #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | |||
| #endif | |||
| #define gotoblas_BULLDOZER gotoblas_BARCELONA | |||
| #define gotoblas_PILEDRIVER gotoblas_BARCELONA | |||
| #endif | |||
| //Use sandy bridge kernels for haswell. | |||
| #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | |||
| @@ -83,6 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include <dirent.h> | |||
| #include <dlfcn.h> | |||
| #include <unistd.h> | |||
| #include <string.h> | |||
| #define MAX_NODES 16 | |||
| #define MAX_CPUS 256 | |||
| @@ -315,7 +316,7 @@ static int numa_check(void) { | |||
| } | |||
| while ((dir = readdir(dp)) != NULL) { | |||
| if (*(unsigned int *) dir -> d_name == 0x065646f6eU) { | |||
| if (strncmp(dir->d_name, "node", 4)==0) { | |||
| node = atoi(&dir -> d_name[4]); | |||
| @@ -18,6 +18,10 @@ ifndef NO_LAPACKE | |||
| NO_LAPACKE = 0 | |||
| endif | |||
| ifndef NEED2UNDERSCORES | |||
| NEED2UNDERSCORES=0 | |||
| endif | |||
| ifeq ($(OSNAME), WINNT) | |||
| ifeq ($(F_COMPILER), GFORTRAN) | |||
| EXTRALIB += -lgfortran | |||
| @@ -89,18 +93,18 @@ else | |||
| endif | |||
| libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def | |||
| $(CC) $(CFLAGS) libgoto2_shared.def -shared -o $(@F) \ | |||
| $(CC) $(CFLAGS) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \ | |||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ | |||
| -Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB) | |||
| libopenblas.def : gensymbol | |||
| perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||
| perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||
| libgoto2_shared.def : gensymbol | |||
| perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||
| perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||
| libgoto_hpl.def : gensymbol | |||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||
| $(LIBDYNNAME) : ../$(LIBNAME) osx.def | |||
| $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||
| @@ -116,14 +120,14 @@ ifeq ($(OSNAME), Linux) | |||
| so : ../$(LIBSONAME) | |||
| ../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c | |||
| $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \ | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ | |||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ | |||
| -Wl,--retain-symbols-file=linux.def -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) | |||
| ifneq ($(C_COMPILER), LSB) | |||
| $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| else | |||
| #Use FC on LSB | |||
| $(FC) $(FFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| $(FC) $(FFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| endif | |||
| rm -f linktest | |||
| @@ -135,10 +139,10 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD)) | |||
| so : ../$(LIBSONAME) | |||
| ../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c | |||
| $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \ | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ | |||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ | |||
| -Wl,--retain-symbols-file=linux.def $(FEXTRALIB) $(EXTRALIB) | |||
| $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| rm -f linktest | |||
| endif | |||
| @@ -148,15 +152,15 @@ ifeq ($(OSNAME), OSF1) | |||
| so : ../$(LIBSONAME) | |||
| ../$(LIBSONAME) : | |||
| $(CC) -shared -o ../$(LIBSONAME) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) ../$(LIBNAME) | |||
| endif | |||
| ifeq ($(OSNAME), SunOS) | |||
| so : ../$(LIBSONAME) | |||
| $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \ | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ | |||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(EXTRALIB) | |||
| $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||
| rm -f linktest | |||
| endif | |||
| @@ -187,23 +191,23 @@ static : ../$(LIBNAME) | |||
| rm -f goto.$(SUFFIX) | |||
| linux.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||
| perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||
| osx.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||
| perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||
| aix.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||
| perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||
| symbol.S : gensymbol | |||
| perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > symbol.S | |||
| perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > symbol.S | |||
| test : linktest.c | |||
| $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | |||
| rm -f linktest | |||
| linktest.c : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > linktest.c | |||
| perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > linktest.c | |||
| clean :: | |||
| @rm -f *.def *.dylib __.SYMDEF* | |||
| @@ -114,8 +114,8 @@ | |||
| # ALLAUX -- Auxiliary routines called from all precisions | |||
| # already provided by @blasobjs: xerbla, lsame | |||
| ilaenv, ieeeck, lsamen, xerbla_array, iparmq, | |||
| ilaprec, ilatrans, ilauplo, iladiag, chla_transtype, | |||
| ilaenv, ieeeck, lsamen, iparmq, | |||
| ilaprec, ilatrans, ilauplo, iladiag, | |||
| ilaver, slamch, slamc3, | |||
| # SCLAUX -- Auxiliary routines called from both REAL and COMPLEX. | |||
| @@ -2672,12 +2672,25 @@ | |||
| #LAPACKE_zlagsy_work, | |||
| ); | |||
| #These function may need 2 underscores. | |||
| @lapack_embeded_underscore_objs=(xerbla_array, chla_transtype,); | |||
| if ($ARGV[5] == 1) { | |||
| #NO_LAPACK=1 | |||
| @underscore_objs = (@blasobjs, @misc_underscore_objs); | |||
| } elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" || | |||
| -d "../lapack-3.4.2" || -d "../lapack-netlib") { | |||
| @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); | |||
| if ($ARGV[7] == 0){ | |||
| # NEED2UNDERSCORES=0 | |||
| # Don't need 2 underscores | |||
| @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs, @lapack_embeded_underscore_objs); | |||
| }else{ | |||
| # Need 2 underscores | |||
| @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); | |||
| @need_2underscore_objs = (@lapack_embeded_underscore_objs); | |||
| }; | |||
| } else { | |||
| @underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs); | |||
| } | |||
| @@ -2729,6 +2742,10 @@ if ($ARGV[0] eq "linux"){ | |||
| print $objs, $bu, "\n"; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| print $objs, $bu, $bu, "\n"; | |||
| } | |||
| # if ($ARGV[4] == 0) { | |||
| foreach $objs (@no_underscore_objs) { | |||
| print $objs, "\n"; | |||
| @@ -2750,6 +2767,10 @@ if ($ARGV[0] eq "osx"){ | |||
| print "_", $objs, $bu, "\n"; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| print "_", $objs, $bu, $bu, "\n"; | |||
| } | |||
| # if ($ARGV[4] == 0) { | |||
| foreach $objs (@no_underscore_objs) { | |||
| print "_", $objs, "\n"; | |||
| @@ -2767,6 +2788,10 @@ if ($ARGV[0] eq "aix"){ | |||
| print $objs, $bu, "\n"; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| print $objs, $bu, $bu, "\n"; | |||
| } | |||
| # if ($ARGV[4] == 0) { | |||
| foreach $objs (@no_underscore_objs) { | |||
| print $objs, "\n"; | |||
| @@ -2791,6 +2816,17 @@ if ($ARGV[0] eq "win2k"){ | |||
| print "\t$uppercase=$objs", "_ \@", $count, "\n"; | |||
| $count ++; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| $uppercase = $objs; | |||
| $uppercase =~ tr/[a-z]/[A-Z]/; | |||
| print "\t$objs=$objs","__ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t",$objs, "__=$objs","__ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t$uppercase=$objs", "__ \@", $count, "\n"; | |||
| $count ++; | |||
| } | |||
| #for misc_common_objs | |||
| foreach $objs (@misc_common_objs) { | |||
| @@ -2852,6 +2888,18 @@ if ($ARGV[0] eq "microsoft"){ | |||
| print "\t$uppercase\_ = $objs","_\n"; | |||
| $count ++; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| $uppercase = $objs; | |||
| $uppercase =~ tr/[a-z]/[A-Z]/; | |||
| print "\t$objs=$objs","__ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t",$objs, "__=$objs","__ \@", $count, "\n"; | |||
| $count ++; | |||
| print "\t$uppercase=$objs", "__ \@", $count, "\n"; | |||
| $count ++; | |||
| } | |||
| exit(0); | |||
| } | |||
| @@ -2868,6 +2916,16 @@ if ($ARGV[0] eq "win2kasm"){ | |||
| print "_", $uppercase, "_:\n"; | |||
| print "\tjmp\t_", $objs, "_\n"; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| $uppercase = $objs; | |||
| $uppercase =~ tr/[a-z]/[A-Z]/; | |||
| print "\t.align 16\n"; | |||
| print "\t.globl _", $uppercase, "__\n"; | |||
| print "_", $uppercase, "__:\n"; | |||
| print "\tjmp\t_", $objs, "__\n"; | |||
| } | |||
| exit(0); | |||
| } | |||
| @@ -2880,6 +2938,11 @@ if ($ARGV[0] eq "linktest"){ | |||
| foreach $objs (@underscore_objs) { | |||
| print $objs, $bu, "();\n" if $objs ne "xerbla"; | |||
| } | |||
| foreach $objs (@need_2underscore_objs) { | |||
| print $objs, $bu, $bu, "();\n"; | |||
| } | |||
| # if ($ARGV[4] == 0) { | |||
| foreach $objs (@no_underscore_objs) { | |||
| print $objs, "();\n"; | |||
| @@ -114,6 +114,12 @@ if ($compiler eq "") { | |||
| $vendor = IBM; | |||
| $openmp = "-openmp"; | |||
| } | |||
| # for embeded underscore name, e.g. zho_ge, it may append 2 underscores. | |||
| $data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`; | |||
| if ($data =~ /zho_ge__/) { | |||
| $need2bu = 1; | |||
| } | |||
| } | |||
| if ($vendor eq "") { | |||
| @@ -245,6 +251,8 @@ if ($link ne "") { | |||
| $link =~ s/\-rpath\s+/\-rpath\@/g; | |||
| $link =~ s/\-rpath-link\s+/\-rpath-link\@/g; | |||
| @flags = split(/[\s\,\n]/, $link); | |||
| # remove leading and trailing quotes from each flag. | |||
| @flags = map {s/^['"]|['"]$//g; $_} @flags; | |||
| @@ -265,7 +273,15 @@ if ($link ne "") { | |||
| $linker_L .= "-Wl,". $flags . " "; | |||
| } | |||
| if ($flags =~ /^\-rpath/) { | |||
| if ($flags =~ /^\-rpath\@/) { | |||
| $flags =~ s/\@/\,/g; | |||
| if ($vendor eq "PGI") { | |||
| $flags =~ s/lib$/libso/; | |||
| } | |||
| $linker_L .= "-Wl,". $flags . " " ; | |||
| } | |||
| if ($flags =~ /^\-rpath-link\@/) { | |||
| $flags =~ s/\@/\,/g; | |||
| if ($vendor eq "PGI") { | |||
| $flags =~ s/lib$/libso/; | |||
| @@ -309,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1; | |||
| print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne ""; | |||
| print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne ""; | |||
| print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne ""; | |||
| print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne ""; | |||
| if (($linker_l ne "") || ($linker_a ne "")) { | |||
| print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n"; | |||
| @@ -0,0 +1,6 @@ | |||
| double complex function zho_ge() | |||
| zho_ge = (0.0d0,0.0d0) | |||
| return | |||
| end | |||
| @@ -354,7 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "OPTERON" | |||
| #endif | |||
| #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) | |||
| #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_PILEDRIVER) || defined (FORCE_BULLDOZER) | |||
| #define FORCE | |||
| #define FORCE_INTEL | |||
| #define ARCHITECTURE "X86" | |||
| @@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "BOBCAT" | |||
| #endif | |||
| #if defined (FORCE_BULLDOZER) | |||
| #if 0 | |||
| #define FORCE | |||
| #define FORCE_INTEL | |||
| #define ARCHITECTURE "X86" | |||
| @@ -400,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "BULLDOZER" | |||
| #endif | |||
| #if defined (FORCE_PILEDRIVER) | |||
| #if 0 | |||
| #define FORCE | |||
| #define FORCE_INTEL | |||
| #define ARCHITECTURE "X86" | |||
| @@ -8,7 +8,7 @@ | |||
| int main(int argc, char **argv) { | |||
| if ( (argc <= 1) || (argc >= 2) && (*argv[1] == '0')) { | |||
| if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) { | |||
| printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M); | |||
| printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N); | |||
| printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M); | |||
| @@ -60,7 +60,7 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT * | |||
| }; | |||
| #endif | |||
| extern void dtrtri_lapack_(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); | |||
| extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); | |||
| int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ | |||
| @@ -137,7 +137,10 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In | |||
| // double trtri_U single thread error | |||
| // call dtrtri from lapack for a walk around. | |||
| if(uplo==0){ | |||
| dtrtri_lapack_(UPLO, DIAG, N, a, ldA, Info); | |||
| BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info); | |||
| #ifndef PPC440 | |||
| blas_memory_free(buffer); | |||
| #endif | |||
| return 0; | |||
| } | |||
| #endif | |||
| @@ -107,7 +107,7 @@ | |||
| *> \ingroup doubleOTHERcomputational | |||
| * | |||
| * ===================================================================== | |||
| SUBROUTINE DTRTRI_LAPACK( UPLO, DIAG, N, A, LDA, INFO ) | |||
| SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO ) | |||
| * | |||
| * -- LAPACK computational routine (version 3.4.0) -- | |||
| * -- LAPACK is a software package provided by Univ. of Tennessee, -- | |||
| @@ -88,7 +88,7 @@ else | |||
| endif | |||
| endif | |||
| FLDFLAGS = $(FFLAGS:-fPIC=) | |||
| FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS) | |||
| CEXTRALIB = | |||