| @@ -8,6 +8,8 @@ lapack-3.1.1 | |||
| lapack-3.1.1.tgz | |||
| lapack-3.4.1 | |||
| lapack-3.4.1.tgz | |||
| lapack-3.4.2 | |||
| lapack-3.4.2.tgz | |||
| *.so | |||
| *.a | |||
| .svn | |||
| @@ -1,4 +1,20 @@ | |||
| OpenBLAS ChangeLog | |||
| ==================================================================== | |||
| Version 0.2.5 | |||
| 26-Nov-2012 | |||
| common: | |||
| * Added NO_SHARED flag to disable generating the shared library. | |||
| * Compile LAPACKE with ILP64 modle when INTERFACE64=1 (#158) | |||
| * Export LAPACK 3.4.2 symbols in shared library. (#147) | |||
| * Only detect the number of physical CPU cores on Mac OSX. (#157) | |||
| * Fixed NetBSD build. (#155) | |||
| * Fixed compilation with TARGET=GENERIC. (#160) | |||
| x86/x86-64: | |||
| * Restore the original CPU affinity when calling | |||
| openblas_set_num_threads(1) (#153) | |||
| * Fixed a SEGFAULT bug in dgemv_t when m is very large.(#154) | |||
| MIPS64: | |||
| ==================================================================== | |||
| Version 0.2.4 | |||
| 8-Oct-2012 | |||
| @@ -80,6 +80,7 @@ endif | |||
| @echo | |||
| shared : | |||
| ifndef NO_SHARED | |||
| ifeq ($(OSNAME), Linux) | |||
| $(MAKE) -C exports so | |||
| -ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||
| @@ -103,6 +104,7 @@ endif | |||
| ifeq ($(OSNAME), CYGWIN_NT) | |||
| $(MAKE) -C exports dll | |||
| endif | |||
| endif | |||
| tests : | |||
| ifndef NOFORTRAN | |||
| @@ -222,7 +224,11 @@ ifndef NOFORTRAN | |||
| -@echo "PNOOPT = $(FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| ifdef INTERFACE64 | |||
| -@echo "CFLAGS = $(CFLAGS) -DHAVE_LAPACK_CONFIG_H -DLAPACK_ILP64" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| else | |||
| -@echo "CFLAGS = $(CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| endif | |||
| -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
| @@ -71,11 +71,9 @@ ifeq ($(OSNAME), Darwin) | |||
| endif | |||
| ifeq ($(OSNAME), WINNT) | |||
| -cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR) | |||
| -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dll | |||
| endif | |||
| ifeq ($(OSNAME), CYGWIN_NT) | |||
| -cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR) | |||
| -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dll | |||
| endif | |||
| @echo Install OK! | |||
| @@ -3,7 +3,7 @@ | |||
| # | |||
| # This library's version | |||
| VERSION = 0.2.4 | |||
| VERSION = 0.2.5 | |||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | |||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | |||
| @@ -24,10 +24,13 @@ VERSION = 0.2.4 | |||
| # Fortran compiler. Default is g77. | |||
| # FC = gfortran | |||
| # Even you can specify cross compiler | |||
| # Even you can specify cross compiler. Meanwhile, please set HOSTCC. | |||
| # CC = x86_64-w64-mingw32-gcc | |||
| # FC = x86_64-w64-mingw32-gfortran | |||
| # If you use the cross compiler, please set this host compiler. | |||
| # HOSTCC = gcc | |||
| # If you need 32bit binary, define BINARY=32, otherwise define BINARY=64 | |||
| # BINARY=64 | |||
| @@ -45,6 +48,9 @@ VERSION = 0.2.4 | |||
| # automatically detected by the the script. | |||
| # NUM_THREADS = 24 | |||
| # if you don't need generate the shared library, please comment it in. | |||
| # NO_SHARED = 1 | |||
| # If you don't need CBLAS interface, please comment it in. | |||
| # NO_CBLAS = 1 | |||
| @@ -129,7 +129,7 @@ MD5SUM = md5 -r | |||
| endif | |||
| ifeq ($(OSNAME), NetBSD) | |||
| MD5SUM = md5 -r | |||
| MD5SUM = md5 -n | |||
| endif | |||
| ifeq ($(OSNAME), Linux) | |||
| @@ -351,7 +351,12 @@ typedef int blasint; | |||
| #endif | |||
| #define MMAP_ACCESS (PROT_READ | PROT_WRITE) | |||
| #ifdef __NetBSD__ | |||
| #define MMAP_POLICY (MAP_PRIVATE | MAP_ANON) | |||
| #else | |||
| #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS) | |||
| #endif | |||
| #include "param.h" | |||
| #include "common_param.h" | |||
| @@ -770,6 +770,19 @@ void goto_set_num_threads(int num_threads) { | |||
| if (num_threads < 1) num_threads = blas_num_threads; | |||
| #ifndef NO_AFFINITY | |||
| if (num_threads == 1) { | |||
| if (blas_cpu_number == 1){ | |||
| //OpenBLAS is already single thread. | |||
| return; | |||
| }else{ | |||
| //From multi-threads to single thread | |||
| //Restore the original affinity mask | |||
| gotoblas_set_affinity(-1); | |||
| } | |||
| } | |||
| #endif | |||
| if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; | |||
| if (num_threads > blas_num_threads) { | |||
| @@ -800,6 +813,13 @@ void goto_set_num_threads(int num_threads) { | |||
| UNLOCK_COMMAND(&server_lock); | |||
| } | |||
| #ifndef NO_AFFINITY | |||
| if(blas_cpu_number == 1 && num_threads > 1){ | |||
| //Restore the thread 0 affinity. | |||
| gotoblas_set_affinity(0); | |||
| } | |||
| #endif | |||
| blas_cpu_number = num_threads; | |||
| #if defined(ARCH_MIPS64) | |||
| @@ -185,7 +185,7 @@ int get_num_procs(void) { | |||
| #endif | |||
| #if defined(OS_FREEBSD) || defined(OS_DARWIN) | |||
| #if defined(OS_FREEBSD) | |||
| int get_num_procs(void) { | |||
| @@ -206,6 +206,18 @@ int get_num_procs(void) { | |||
| #endif | |||
| #if defined(OS_DARWIN) | |||
| int get_num_procs(void) { | |||
| static int nums = 0; | |||
| size_t len; | |||
| if (nums == 0){ | |||
| len = sizeof(int); | |||
| sysctlbyname("hw.physicalcpu", &nums, &len, NULL, 0); | |||
| } | |||
| return nums; | |||
| } | |||
| #endif | |||
| /* | |||
| OpenBLAS uses the numbers of CPU cores in multithreading. | |||
| It can be set by openblas_set_num_threads(int num_threads); | |||
| @@ -119,7 +119,8 @@ so : ../$(LIBSONAME) | |||
| endif | |||
| ifeq ($(OSNAME), FreeBSD) | |||
| #http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or | |||
| ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD)) | |||
| so : ../$(LIBSONAME) | |||
| @@ -2669,7 +2669,8 @@ | |||
| if ($ARGV[5] == 1) { | |||
| #NO_LAPACK=1 | |||
| @underscore_objs = (@blasobjs, @misc_underscore_objs); | |||
| } elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1") { | |||
| } elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" || | |||
| -d "../lapack-3.4.2") { | |||
| @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); | |||
| } else { | |||
| @underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs); | |||
| @@ -47,7 +47,7 @@ | |||
| #ifndef WINDOWS_ABI | |||
| #define STACKSIZE 64 | |||
| #define STACKSIZE 128 | |||
| #define OLD_M %rdi | |||
| #define OLD_N %rsi | |||
| @@ -57,7 +57,10 @@ | |||
| #define STACK_Y 16 + STACKSIZE(%rsp) | |||
| #define STACK_INCY 24 + STACKSIZE(%rsp) | |||
| #define STACK_BUFFER 32 + STACKSIZE(%rsp) | |||
| #define MMM 56(%rsp) | |||
| #define NN 64(%rsp) | |||
| #define AA 72(%rsp) | |||
| #define LDAX 80(%rsp) | |||
| #else | |||
| #define STACKSIZE 256 | |||
| @@ -132,12 +135,44 @@ | |||
| movq OLD_LDA, LDA | |||
| movq OLD_X, X | |||
| #else | |||
| movq OLD_M, M | |||
| movq OLD_N, N | |||
| movq OLD_A, A | |||
| movq OLD_LDA, LDA | |||
| movq OLD_M, MMM | |||
| movq OLD_N, NN | |||
| movq OLD_A, AA | |||
| movq OLD_LDA, LDAX | |||
| #endif | |||
| #ifdef HAVE_SSE3 | |||
| #ifndef WINDOWS_ABI | |||
| movddup %xmm0, ALPHA | |||
| #else | |||
| movddup %xmm3, ALPHA | |||
| #endif | |||
| #else | |||
| #ifndef WINDOWS_ABI | |||
| movapd %xmm0, ALPHA | |||
| #else | |||
| movapd %xmm3, ALPHA | |||
| #endif | |||
| unpcklpd ALPHA, ALPHA | |||
| #endif | |||
| .L0x: | |||
| xorq M,M | |||
| addq $1,M | |||
| salq $22,M | |||
| subq M,MMM | |||
| jge .L00 | |||
| movq MMM,%rax | |||
| addq M,%rax | |||
| jle .L999x | |||
| movq %rax,M | |||
| .L00: | |||
| movq LDAX,LDA | |||
| movq NN,N | |||
| movq AA,A | |||
| movq STACK_INCX, INCX | |||
| movq STACK_Y, Y | |||
| movq STACK_INCY, INCY | |||
| @@ -153,21 +188,6 @@ | |||
| subq $-16 * SIZE, A | |||
| #ifdef HAVE_SSE3 | |||
| #ifndef WINDOWS_ABI | |||
| movddup %xmm0, ALPHA | |||
| #else | |||
| movddup %xmm3, ALPHA | |||
| #endif | |||
| #else | |||
| #ifndef WINDOWS_ABI | |||
| movapd %xmm0, ALPHA | |||
| #else | |||
| movapd %xmm3, ALPHA | |||
| #endif | |||
| unpcklpd ALPHA, ALPHA | |||
| #endif | |||
| testq M, M | |||
| jle .L999 | |||
| testq N, N | |||
| @@ -854,7 +874,6 @@ | |||
| .L21: | |||
| #endif | |||
| subq $4, N | |||
| leaq 16 * SIZE(BUFFER), X1 | |||
| @@ -2461,6 +2480,12 @@ | |||
| ALIGN_4 | |||
| .L999: | |||
| leaq (, M, SIZE), %rax | |||
| addq %rax,AA | |||
| jmp .L0x; | |||
| ALIGN_4 | |||
| .L999x: | |||
| movq 0(%rsp), %rbx | |||
| movq 8(%rsp), %rbp | |||
| movq 16(%rsp), %r12 | |||
| @@ -1664,26 +1664,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define XGEMM_DEFAULT_UNROLL_M 1 | |||
| #endif | |||
| #define SGEMM_P sgemm_p | |||
| #define DGEMM_P dgemm_p | |||
| #define QGEMM_P qgemm_p | |||
| #define CGEMM_P cgemm_p | |||
| #define ZGEMM_P zgemm_p | |||
| #define XGEMM_P xgemm_p | |||
| #define SGEMM_R sgemm_r | |||
| #define DGEMM_R dgemm_r | |||
| #define QGEMM_R qgemm_r | |||
| #define CGEMM_R cgemm_r | |||
| #define ZGEMM_R zgemm_r | |||
| #define XGEMM_R xgemm_r | |||
| #define SGEMM_Q 128 | |||
| #define DGEMM_Q 128 | |||
| #define QGEMM_Q 128 | |||
| #define CGEMM_Q 128 | |||
| #define ZGEMM_Q 128 | |||
| #define XGEMM_Q 128 | |||
| #define SGEMM_DEFAULT_P sgemm_p | |||
| #define DGEMM_DEFAULT_P dgemm_p | |||
| #define QGEMM_DEFAULT_P qgemm_p | |||
| #define CGEMM_DEFAULT_P cgemm_p | |||
| #define ZGEMM_DEFAULT_P zgemm_p | |||
| #define XGEMM_DEFAULT_P xgemm_p | |||
| #define SGEMM_DEFAULT_R sgemm_r | |||
| #define DGEMM_DEFAULT_R dgemm_r | |||
| #define QGEMM_DEFAULT_R qgemm_r | |||
| #define CGEMM_DEFAULT_R cgemm_r | |||
| #define ZGEMM_DEFAULT_R zgemm_r | |||
| #define XGEMM_DEFAULT_R xgemm_r | |||
| #define SGEMM_DEFAULT_Q 128 | |||
| #define DGEMM_DEFAULT_Q 128 | |||
| #define QGEMM_DEFAULT_Q 128 | |||
| #define CGEMM_DEFAULT_Q 128 | |||
| #define ZGEMM_DEFAULT_Q 128 | |||
| #define XGEMM_DEFAULT_Q 128 | |||
| #define SYMV_P 16 | |||
| @@ -899,19 +899,64 @@ diff -ruN lapack-3.4.2.old/TESTING/LIN/Makefile lapack-3.4.2/TESTING/LIN/Makefil | |||
| ../xlintsts: xlintsts | |||
| mv xlintsts $@ | |||
| diff -ruN lapack-3.4.2.old/lapacke/src/Makefile lapack-3.4.2/lapacke/src/Makefile | |||
| --- lapack-3.4.2.old/lapacke/src/Makefile 2012-04-02 22:16:32 +0200 | |||
| +++ lapack-3.4.2/lapacke/src/Makefile 2012-04-22 21:38:38 +0200 | |||
| @@ -2041,19 +2041,21 @@ | |||
| --- lapack-3.4.2.old/lapacke/src/Makefile 2012-09-21 04:21:29 +0200 | |||
| +++ lapack-3.4.2/lapacke/src/Makefile 2012-10-15 22:04:56 +0200 | |||
| @@ -34,7 +34,7 @@ | |||
| # | |||
| include ../../make.inc | |||
| -SRC_OBJ = \ | |||
| +CSRC_OBJ = \ | |||
| lapacke_cbbcsd.o \ | |||
| lapacke_cbbcsd_work.o \ | |||
| lapacke_cbdsqr.o \ | |||
| @@ -526,7 +526,9 @@ | |||
| lapacke_cupgtr.o \ | |||
| lapacke_cupgtr_work.o \ | |||
| lapacke_cupmtr.o \ | |||
| -lapacke_cupmtr_work.o \ | |||
| +lapacke_cupmtr_work.o | |||
| + | |||
| +DSRC_OBJ = \ | |||
| lapacke_dbbcsd.o \ | |||
| lapacke_dbbcsd_work.o \ | |||
| lapacke_dbdsdc.o \ | |||
| @@ -1012,7 +1014,9 @@ | |||
| lapacke_dtrttp.o \ | |||
| lapacke_dtrttp_work.o \ | |||
| lapacke_dtzrzf.o \ | |||
| -lapacke_dtzrzf_work.o \ | |||
| +lapacke_dtzrzf_work.o | |||
| + | |||
| +SSRC_OBJ = \ | |||
| lapacke_sbbcsd.o \ | |||
| lapacke_sbbcsd_work.o \ | |||
| lapacke_sbdsdc.o \ | |||
| @@ -1492,7 +1496,9 @@ | |||
| lapacke_strttp.o \ | |||
| lapacke_strttp_work.o \ | |||
| lapacke_stzrzf.o \ | |||
| -lapacke_stzrzf_work.o \ | |||
| +lapacke_stzrzf_work.o | |||
| + | |||
| +ZSRC_OBJ = \ | |||
| lapacke_zbbcsd.o \ | |||
| lapacke_zbbcsd_work.o \ | |||
| lapacke_zbdsqr.o \ | |||
| @@ -2041,19 +2047,29 @@ | |||
| lapacke_zlagsy.o \ | |||
| lapacke_zlagsy_work.o | |||
| -ALLOBJ = $(SRC_OBJ) $(MATGEN_OBJ) | |||
| +OBJ_FILES := $(SRC_OBJ) | |||
| +COBJ_FILES := $(CSRC_OBJ) | |||
| +SOBJ_FILES := $(SSRC_OBJ) | |||
| +DOBJ_FILES := $(DSRC_OBJ) | |||
| +ZOBJ_FILES := $(ZSRC_OBJ) | |||
| -ifdef USEXBLAS | |||
| -ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC) | |||
| +ifdef LAPACKE_EXTENDED | |||
| +OBJ_FILES += $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC) | |||
| +OBJ_FILES += $(SRCX_OBJ) | |||
| endif | |||
| - | |||
| @@ -924,9 +969,32 @@ diff -ruN lapack-3.4.2.old/lapacke/src/Makefile lapack-3.4.2/lapacke/src/Makefil | |||
| -../../$(LAPACKELIB): $(ALLOBJ) $(ALLXOBJ) | |||
| - $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJ) $(ALLXOBJ) | |||
| +../../$(LAPACKELIB): $(OBJ_FILES) | |||
| +# http://hackage.haskell.org/trac/gtk2hs/ticket/1146 | |||
| + echo $(OBJ_FILES) | xargs -n 100 $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) | |||
| +../../$(LAPACKELIB): $(COBJ_FILES) $(DOBJ_FILES) $(SOBJ_FILES) $(ZOBJ_FILES) $(OBJ_FILES) | |||
| + $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(COBJ_FILES) | |||
| + $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(DOBJ_FILES) | |||
| + $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(SOBJ_FILES) | |||
| + $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ZOBJ_FILES) | |||
| +ifneq ($(strip $(OBJ_FILES)),) | |||
| + $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(OBJ_FILES) | |||
| +endif | |||
| $(RANLIB) ../../$(LAPACKELIB) | |||
| .c.o: | |||
| diff -ruN lapack-3.4.2.old/lapacke/example/Makefile lapack-3.4.2/lapacke/example/Makefile | |||
| --- lapack-3.4.2.old/lapacke/example/Makefile 2012-03-23 06:55:22.000000000 +0800 | |||
| +++ lapack-3.4.2/lapacke/example/Makefile 2012-11-13 00:32:24.125449952 +0800 | |||
| @@ -4,12 +4,12 @@ | |||
| xexample_DGESV_rowmajor: example_DGESV_rowmajor.o ../../$(LAPACKLIB) ../../$(LAPACKELIB) | |||
| $(LOADER) $(LOADOPTS) example_DGESV_rowmajor.o \ | |||
| - ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ | |||
| + ../../$(LAPACKELIB) $(CEXTRALIB) -o $@ | |||
| ./$@ | |||
| xexample_ZGESV_rowmajor: example_ZGESV_rowmajor.o ../../$(LAPACKLIB) ../../$(LAPACKELIB) | |||
| $(LOADER) $(LOADOPTS) example_ZGESV_rowmajor.o \ | |||
| - ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ | |||
| + ../../$(LAPACKELIB) $(CEXTRALIB) -o $@ | |||
| ./$@ | |||
| .c.o: | |||