| @@ -1,7 +1,8 @@ | |||||
| OpenBLAS ChangeLog | OpenBLAS ChangeLog | ||||
| ==================================================================== | ==================================================================== | ||||
| Version 0.1 alpha2(in development) | |||||
| 0;136;0c | |||||
| Version 0.1 alpha2 | |||||
| 23-Jun-2011 | |||||
| common: | common: | ||||
| * Fixed blasint undefined bug in <cblas.h> file. Other software | * Fixed blasint undefined bug in <cblas.h> file. Other software | ||||
| could include this header successfully(Refs issue #13 on github) | could include this header successfully(Refs issue #13 on github) | ||||
| @@ -31,6 +32,8 @@ x86/x86_64: | |||||
| MIPS64: | MIPS64: | ||||
| * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. | * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. | ||||
| * Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2) | |||||
| * Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3) | |||||
| ==================================================================== | ==================================================================== | ||||
| Version 0.1 alpha1 | Version 0.1 alpha1 | ||||
| @@ -74,7 +74,7 @@ ifeq ($(OSNAME), Darwin) | |||||
| endif | endif | ||||
| ifeq ($(OSNAME), WINNT) | ifeq ($(OSNAME), WINNT) | ||||
| $(MAKE) -C exports dll | $(MAKE) -C exports dll | ||||
| # -ln -fs $(LIBDLLNAME) libopenblas.dll | |||||
| -ln -fs $(LIBDLLNAME) libopenblas.dll | |||||
| endif | endif | ||||
| ifeq ($(OSNAME), CYGWIN_NT) | ifeq ($(OSNAME), CYGWIN_NT) | ||||
| $(MAKE) -C exports dll | $(MAKE) -C exports dll | ||||
| @@ -72,6 +72,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve | |||||
| 9.Known Issues | 9.Known Issues | ||||
| * The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit | * The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit | ||||
| is 64. On 32 bits, it is 32. | is 64. On 32 bits, it is 32. | ||||
| * This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully. | |||||
| 10. Specification of Git Branches | 10. Specification of Git Branches | ||||
| We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). | We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). | ||||
| @@ -79,4 +80,4 @@ Now, there are 4 branches in github.com. | |||||
| * The master branch. This a main branch to reflect a production-ready state. | * The master branch. This a main branch to reflect a production-ready state. | ||||
| * The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. | * The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. | ||||
| * The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. | * The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. | ||||
| * The gh-pages branch. This is for web pages | |||||
| * The gh-pages branch. This is for web pages | |||||
| @@ -220,6 +220,11 @@ REALNAME: ;\ | |||||
| #define BUFFER_SIZE ( 8 << 20) | #define BUFFER_SIZE ( 8 << 20) | ||||
| #if defined(LOONGSON3A) | |||||
| #define PAGESIZE (16UL << 10) | |||||
| #define FIXED_PAGESIZE (16UL << 10) | |||||
| #endif | |||||
| #ifndef PAGESIZE | #ifndef PAGESIZE | ||||
| #define PAGESIZE (64UL << 10) | #define PAGESIZE (64UL << 10) | ||||
| #endif | #endif | ||||
| @@ -38,7 +38,7 @@ | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <stdlib.h> | #include <stdlib.h> | ||||
| #include <sys/mman.h> | |||||
| //#include <sys/mman.h> | |||||
| #include "common.h" | #include "common.h" | ||||
| #ifndef USE_OPENMP | #ifndef USE_OPENMP | ||||
| @@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME) | |||||
| zip : dll | zip : dll | ||||
| zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME) | zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME) | ||||
| dll : libgoto2.dll | |||||
| dll : ../$(LIBDLLNAME) | |||||
| #libgoto2.dll | |||||
| dll2 : libgoto2_shared.dll | dll2 : libgoto2_shared.dll | ||||
| libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) | |||||
| ../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) | |||||
| $(RANLIB) ../$(LIBNAME) | $(RANLIB) ../$(LIBNAME) | ||||
| ifeq ($(BINARY32), 1) | ifeq ($(BINARY32), 1) | ||||
| $(DLLWRAP) -o $(@F) --def libgoto2.def \ | |||||
| $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ | |||||
| --entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | --entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | ||||
| -lib /machine:i386 /def:libgoto2.def | -lib /machine:i386 /def:libgoto2.def | ||||
| else | else | ||||
| $(DLLWRAP) -o $(@F) --def libgoto2.def \ | |||||
| $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ | |||||
| --entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | --entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | ||||
| -lib /machine:X64 /def:libgoto2.def | -lib /machine:X64 /def:libgoto2.def | ||||
| endif | endif | ||||
| @@ -91,15 +91,37 @@ ifndef ZGEMM_BETA | |||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
| endif | endif | ||||
| ifndef STRSMKERNEL_LN | |||||
| STRSMKERNEL_LN = trsm_kernel_LN.S | STRSMKERNEL_LN = trsm_kernel_LN.S | ||||
| endif | |||||
| ifndef STRSMKERNEL_LT | |||||
| STRSMKERNEL_LT = trsm_kernel_LT.S | STRSMKERNEL_LT = trsm_kernel_LT.S | ||||
| endif | |||||
| ifndef STRSMKERNEL_RN | |||||
| STRSMKERNEL_RN = trsm_kernel_LT.S | STRSMKERNEL_RN = trsm_kernel_LT.S | ||||
| endif | |||||
| ifndef STRSMKERNEL_RT | |||||
| STRSMKERNEL_RT = trsm_kernel_RT.S | STRSMKERNEL_RT = trsm_kernel_RT.S | ||||
| endif | |||||
| ifndef DTRSMKERNEL_LN | |||||
| DTRSMKERNEL_LN = trsm_kernel_LN.S | DTRSMKERNEL_LN = trsm_kernel_LN.S | ||||
| endif | |||||
| ifndef DTRSMKERNEL_LT | |||||
| DTRSMKERNEL_LT = trsm_kernel_LT.S | DTRSMKERNEL_LT = trsm_kernel_LT.S | ||||
| endif | |||||
| ifndef DTRSMKERNEL_RN | |||||
| DTRSMKERNEL_RN = trsm_kernel_LT.S | DTRSMKERNEL_RN = trsm_kernel_LT.S | ||||
| endif | |||||
| ifndef DTRSMKERNEL_RT | |||||
| DTRSMKERNEL_RT = trsm_kernel_RT.S | DTRSMKERNEL_RT = trsm_kernel_RT.S | ||||
| endif | |||||
| CTRSMKERNEL_LN = ztrsm_kernel_LT.S | CTRSMKERNEL_LN = ztrsm_kernel_LT.S | ||||
| CTRSMKERNEL_LT = ztrsm_kernel_LT.S | CTRSMKERNEL_LT = ztrsm_kernel_LT.S | ||||
| @@ -1,2 +1,24 @@ | |||||
| SAXPYKERNEL=axpy_loongson3a.S | SAXPYKERNEL=axpy_loongson3a.S | ||||
| DAXPYKERNEL=daxpy_loongson3a_simd.S | DAXPYKERNEL=daxpy_loongson3a_simd.S | ||||
| SGEMMKERNEL = sgemm_kernel_loongson3a.S | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| DGEMMKERNEL = gemm_kernel_loongson3a.S | |||||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| @@ -1480,27 +1480,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define GEMM_DEFAULT_OFFSET_B 0 | #define GEMM_DEFAULT_OFFSET_B 0 | ||||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | #define GEMM_DEFAULT_ALIGN 0x03fffUL | ||||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 8 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 8 | |||||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 1 | #define CGEMM_DEFAULT_UNROLL_M 1 | ||||
| #define CGEMM_DEFAULT_UNROLL_N 4 | #define CGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | #define ZGEMM_DEFAULT_UNROLL_M 1 | ||||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | #define ZGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define SGEMM_DEFAULT_P 108 | |||||
| #define DGEMM_DEFAULT_P 112 | |||||
| #define SGEMM_DEFAULT_P 32 | |||||
| #define DGEMM_DEFAULT_P 32 | |||||
| #define CGEMM_DEFAULT_P 108 | #define CGEMM_DEFAULT_P 108 | ||||
| #define ZGEMM_DEFAULT_P 112 | #define ZGEMM_DEFAULT_P 112 | ||||
| #define SGEMM_DEFAULT_Q 288 | |||||
| #define DGEMM_DEFAULT_Q 144 | |||||
| #define SGEMM_DEFAULT_Q 116 | |||||
| #define DGEMM_DEFAULT_Q 116 | |||||
| #define CGEMM_DEFAULT_Q 144 | #define CGEMM_DEFAULT_Q 144 | ||||
| #define ZGEMM_DEFAULT_Q 72 | #define ZGEMM_DEFAULT_Q 72 | ||||
| #define SGEMM_DEFAULT_R 2000 | |||||
| #define DGEMM_DEFAULT_R 2000 | |||||
| #define SGEMM_DEFAULT_R 1000 | |||||
| #define DGEMM_DEFAULT_R 1000 | |||||
| #define CGEMM_DEFAULT_R 2000 | #define CGEMM_DEFAULT_R 2000 | ||||
| #define ZGEMM_DEFAULT_R 2000 | #define ZGEMM_DEFAULT_R 2000 | ||||