| @@ -1,8 +1,13 @@ | |||
| *.obj | |||
| *.lib | |||
| *.dll | |||
| *.def | |||
| *.o | |||
| lapack-3.1.1 | |||
| lapack-3.1.1.tgz | |||
| *.so | |||
| *.a | |||
| .svn | |||
| *~ | |||
| config.h | |||
| Makefile.conf | |||
| @@ -1,6 +1,7 @@ | |||
| OpenBLAS ChangeLog | |||
| ==================================================================== | |||
| Version 0.1 alpha2(in development) | |||
| Version 0.1 alpha2 | |||
| 23-Jun-2011 | |||
| common: | |||
| * Fixed blasint undefined bug in <cblas.h> file. Other software | |||
| @@ -15,11 +16,25 @@ common: | |||
| * Provided an error message when the arch is not supported.(Refs | |||
| issue #19 on github) | |||
| * Fixed issue #23. Fixed a bug of f_check script about generating link flags. | |||
| * Added openblas_set_num_threads for Fortran. | |||
| * Fixed #25 a wrong result of rotmg. | |||
| * Fixed a bug about detecting underscore prefix in c_check. | |||
| * Print the wall time (cycles) with enabling FUNCTION_PROFILE | |||
| * Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1 | |||
| * Added install target. You can use "make install". (Refs #20) | |||
| x86/x86_64: | |||
| * | |||
| * Fixed #28 a wrong result of dsdot on x86_64. | |||
| * Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6. | |||
| * Fixed #33 ztrmm bug on Nehalem. | |||
| * Walk round #27 the low performance axpy issue with small imput size & multithreads. | |||
| MIPS64: | |||
| * | |||
| * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. | |||
| * Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2) | |||
| * Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3) | |||
| ==================================================================== | |||
| Version 0.1 alpha1 | |||
| 20-Mar-2011 | |||
| @@ -15,6 +15,10 @@ ifdef SANITY_CHECK | |||
| BLASDIRS += reference | |||
| endif | |||
| ifndef PREFIX | |||
| PREFIX = /opt/OpenBLAS | |||
| endif | |||
| SUBDIRS = $(BLASDIRS) | |||
| ifneq ($(NO_LAPACK), 1) | |||
| SUBDIRS += lapack | |||
| @@ -22,8 +26,8 @@ endif | |||
| SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench | |||
| .PHONY : all libs netlib test ctest shared | |||
| .NOTPARALLEL : all libs prof lapack-test | |||
| .PHONY : all libs netlib test ctest shared install | |||
| .NOTPARALLEL : all libs prof lapack-test install | |||
| all :: libs netlib tests shared | |||
| @echo | |||
| @@ -70,7 +74,7 @@ ifeq ($(OSNAME), Darwin) | |||
| endif | |||
| ifeq ($(OSNAME), WINNT) | |||
| $(MAKE) -C exports dll | |||
| # -ln -fs $(LIBDLLNAME) libopenblas.dll | |||
| -ln -fs $(LIBDLLNAME) libopenblas.dll | |||
| endif | |||
| ifeq ($(OSNAME), CYGWIN_NT) | |||
| $(MAKE) -C exports dll | |||
| @@ -105,12 +109,17 @@ endif | |||
| $(MAKE) -C $$d $(@F) || exit 1 ; \ | |||
| fi; \ | |||
| done | |||
| #Save the config files for installation | |||
| cp Makefile.conf Makefile.conf_last | |||
| cp config.h config_last.h | |||
| ifdef DYNAMIC_ARCH | |||
| $(MAKE) -C kernel commonlibs || exit 1 | |||
| for d in $(DYNAMIC_CORE) ; \ | |||
| do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ | |||
| done | |||
| echo DYNAMIC_ARCH=1 >> Makefile.conf_last | |||
| endif | |||
| touch lib.grd | |||
| prof : prof_blas prof_lapack | |||
| @@ -230,19 +239,23 @@ lapack-test : | |||
| dummy : | |||
| install : | |||
| $(MAKE) -f Makefile.install install | |||
| clean :: | |||
| @for d in $(SUBDIRS_ALL) ; \ | |||
| do if test -d $$d; then \ | |||
| $(MAKE) -C $$d $(@F) || exit 1 ; \ | |||
| fi; \ | |||
| done | |||
| ifdef DYNAMIC_ARCH | |||
| #ifdef DYNAMIC_ARCH | |||
| @$(MAKE) -C kernel clean | |||
| endif | |||
| #endif | |||
| @rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h | |||
| @rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib | |||
| @if test -d lapack-3.1.1; then \ | |||
| echo deleting lapack-3.1.1; \ | |||
| rm -rf lapack-3.1.1 ;\ | |||
| fi | |||
| @rm -f *.grd Makefile.conf_last config_last.h | |||
| @echo Done. | |||
| @@ -0,0 +1,65 @@ | |||
| TOPDIR = . | |||
| export GOTOBLAS_MAKEFILE = 1 | |||
| -include $(TOPDIR)/Makefile.conf_last | |||
| include ./Makefile.system | |||
| .PHONY : install | |||
| .NOTPARALLEL : install | |||
| lib.grd : | |||
| $(error OpenBLAS: Please run "make" firstly) | |||
| install : lib.grd | |||
| @-mkdir -p $(PREFIX) | |||
| @echo Generating openblas_config.h in $(PREFIX) | |||
| #for inc | |||
| @echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h | |||
| @echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h | |||
| @cat config_last.h >> $(PREFIX)/openblas_config.h | |||
| @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h | |||
| @cat openblas_config_template.h >> $(PREFIX)/openblas_config.h | |||
| @echo \#endif >> $(PREFIX)/openblas_config.h | |||
| @echo Generating f77blas.h in $(PREFIX) | |||
| @echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h | |||
| @echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h | |||
| @echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h | |||
| @cat common_interface.h >> $(PREFIX)/f77blas.h | |||
| @echo \#endif >> $(PREFIX)/f77blas.h | |||
| @echo Generating cblas.h in $(PREFIX) | |||
| @sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h | |||
| #for install static library | |||
| @echo Copy the static library to $(PREFIX) | |||
| @cp $(LIBNAME) $(PREFIX) | |||
| @-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX) | |||
| #for install shared library | |||
| @echo Copy the shared library to $(PREFIX) | |||
| ifeq ($(OSNAME), Linux) | |||
| -cp $(LIBSONAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so | |||
| endif | |||
| ifeq ($(OSNAME), FreeBSD) | |||
| -cp $(LIBSONAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so | |||
| endif | |||
| ifeq ($(OSNAME), NetBSD) | |||
| -cp $(LIBSONAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so | |||
| endif | |||
| ifeq ($(OSNAME), Darwin) | |||
| -cp $(LIBDYNNAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib | |||
| endif | |||
| ifeq ($(OSNAME), WINNT) | |||
| -cp $(LIBDLLNAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll | |||
| endif | |||
| ifeq ($(OSNAME), CYGWIN_NT) | |||
| -cp $(LIBDLLNAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll | |||
| endif | |||
| @echo Install OK! | |||
| @@ -91,6 +91,9 @@ VERSION = 0.1alpha2 | |||
| # SANITY_CHECK to compare the result with reference BLAS. | |||
| # UTEST_CHECK = 1 | |||
| # The installation directory. | |||
| # PREFIX = /opt/OpenBLAS | |||
| # Common Optimization Flag; -O2 is enough. | |||
| # DEBUG = 1 | |||
| @@ -515,6 +515,10 @@ ifeq ($(DYNAMIC_ARCH), 1) | |||
| CCOMMON_OPT += -DDYNAMIC_ARCH | |||
| endif | |||
| ifeq ($(NO_LAPACK), 1) | |||
| CCOMMON_OPT += -DNO_LAPACK | |||
| endif | |||
| ifdef SMP | |||
| CCOMMON_OPT += -DSMP_SERVER | |||
| @@ -22,6 +22,11 @@ make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-g | |||
| 3)Debug version | |||
| make DEBUG=1 | |||
| 4)Intall to the directory (Optional) | |||
| e.g. | |||
| make install PREFIX=your_installation_directory | |||
| The default directory is /opt/OpenBLAS | |||
| 3.Support CPU & OS | |||
| Please read GotoBLAS_01Readme.txt | |||
| @@ -67,6 +72,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve | |||
| 9.Known Issues | |||
| * The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit | |||
| is 64. On 32 bits, it is 32. | |||
| * This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully. | |||
| 10. Specification of Git Branches | |||
| We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). | |||
| @@ -74,4 +80,4 @@ Now, there are 4 branches in github.com. | |||
| * The master branch. This a main branch to reflect a production-ready state. | |||
| * The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. | |||
| * The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. | |||
| * The gh-pages branch. This is for web pages | |||
| * The gh-pages branch. This is for web pages | |||
| @@ -149,7 +149,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/); | |||
| $data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; | |||
| $data =~ /globl\ ([_\.]*)(.*)/; | |||
| $data =~ /globl\s([_\.]*)(.*)/; | |||
| $need_fu = $1; | |||
| @@ -220,6 +220,11 @@ REALNAME: ;\ | |||
| #define BUFFER_SIZE ( 8 << 20) | |||
| #if defined(LOONGSON3A) | |||
| #define PAGESIZE (16UL << 10) | |||
| #define FIXED_PAGESIZE (16UL << 10) | |||
| #endif | |||
| #ifndef PAGESIZE | |||
| #define PAGESIZE (64UL << 10) | |||
| #endif | |||
| @@ -60,4 +60,8 @@ float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *, | |||
| double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *); | |||
| double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *); | |||
| void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *); | |||
| double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*); | |||
| #endif | |||
| @@ -6,7 +6,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) | |||
| COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX) | |||
| ifdef SMP | |||
| COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) | |||
| COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) | |||
| ifndef NO_AFFINITY | |||
| COMMONOBJS += init.$(SUFFIX) | |||
| endif | |||
| @@ -100,6 +100,9 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h | |||
| blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| @@ -38,7 +38,7 @@ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include <sys/mman.h> | |||
| //#include <sys/mman.h> | |||
| #include "common.h" | |||
| #ifndef USE_OPENMP | |||
| @@ -0,0 +1,45 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #include "common.h" | |||
| #ifdef SMP_SERVER | |||
| #ifdef OS_LINUX | |||
| extern void openblas_set_num_threads(int num_threads) ; | |||
| void NAME(int* num_threads){ | |||
| openblas_set_num_threads(*num_threads); | |||
| } | |||
| #endif | |||
| #endif | |||
| @@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) { | |||
| if (cycles > 0) { | |||
| fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n"); | |||
| fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n"); | |||
| fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n"); | |||
| for (i = 0; i < MAX_PROF_TABLE; i ++) { | |||
| if (function_profile_table[i].calls) { | |||
| #ifndef OS_WINDOWS | |||
| fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n", | |||
| fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n", | |||
| #else | |||
| fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n", | |||
| fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n", | |||
| #endif | |||
| func_table[i], | |||
| function_profile_table[i].calls, | |||
| (double)function_profile_table[i].cycles / (double)cycles * 100., | |||
| (double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100., | |||
| (double)function_profile_table[i].area / (double)function_profile_table[i].cycles | |||
| (double)function_profile_table[i].area / (double)function_profile_table[i].cycles, | |||
| function_profile_table[i].cycles | |||
| ); | |||
| } | |||
| } | |||
| @@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME) | |||
| zip : dll | |||
| zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME) | |||
| dll : libgoto2.dll | |||
| dll : ../$(LIBDLLNAME) | |||
| #libgoto2.dll | |||
| dll2 : libgoto2_shared.dll | |||
| libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) | |||
| ../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) | |||
| $(RANLIB) ../$(LIBNAME) | |||
| ifeq ($(BINARY32), 1) | |||
| $(DLLWRAP) -o $(@F) --def libgoto2.def \ | |||
| $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ | |||
| --entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | |||
| -lib /machine:i386 /def:libgoto2.def | |||
| else | |||
| $(DLLWRAP) -o $(@F) --def libgoto2.def \ | |||
| $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ | |||
| --entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | |||
| -lib /machine:X64 /def:libgoto2.def | |||
| endif | |||
| @@ -84,7 +85,7 @@ libgoto_hpl.def : gensymbol | |||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F) | |||
| $(LIBDYNNAME) : ../$(LIBNAME) osx.def | |||
| $(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o $(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||
| $(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||
| symbol.$(SUFFIX) : symbol.S | |||
| $(CC) $(CFLAGS) -c -o $(@F) $^ | |||
| @@ -85,7 +85,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc | |||
| //In that case, the threads would be dependent. | |||
| if (incx == 0 || incy == 0) | |||
| nthreads = 1; | |||
| //Temporarily walk around the low performance issue with small imput size & multithreads. | |||
| if (n <= 10000) | |||
| nthreads = 1; | |||
| if (nthreads == 1) { | |||
| #endif | |||
| @@ -49,6 +49,7 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ | |||
| BLASLONG n = *N; | |||
| BLASLONG incx = *INCX; | |||
| BLASLONG incy = *INCY; | |||
| double ret = 0.0; | |||
| PRINT_DEBUG_NAME; | |||
| @@ -61,19 +62,21 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ | |||
| if (incx < 0) x -= (n - 1) * incx; | |||
| if (incy < 0) y -= (n - 1) * incy; | |||
| return DSDOT_K(n, x, incx, y, incy); | |||
| ret=DSDOT_K(n, x, incx, y, incy); | |||
| FUNCTION_PROFILE_END(1, n, n); | |||
| IDEBUG_END; | |||
| return 0; | |||
| return ret; | |||
| } | |||
| #else | |||
| double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ | |||
| double ret = 0.0; | |||
| PRINT_DEBUG_CNAME; | |||
| @@ -86,13 +89,13 @@ double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ | |||
| if (incx < 0) x -= (n - 1) * incx; | |||
| if (incy < 0) y -= (n - 1) * incy; | |||
| return DSDOT_K(n, x, incx, y, incy); | |||
| ret=DSDOT_K(n, x, incx, y, incy); | |||
| FUNCTION_PROFILE_END(1, n, n); | |||
| IDEBUG_END; | |||
| return 0; | |||
| return ret; | |||
| } | |||
| @@ -7,6 +7,12 @@ | |||
| #define GAMSQ 16777216.e0 | |||
| #define RGAMSQ 5.9604645e-8 | |||
| #ifdef DOUBLE | |||
| #define ABS(x) fabs(x) | |||
| #else | |||
| #define ABS(x) fabsf(x) | |||
| #endif | |||
| #ifndef CBLAS | |||
| void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){ | |||
| @@ -47,7 +53,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||
| dq2 = dp2 * dy1; | |||
| dq1 = dp1 * *dx1; | |||
| if (! (abs(dq1) > abs(dq2))) goto L40; | |||
| if (! (ABS(dq1) > ABS(dq2))) goto L40; | |||
| dh21 = -(dy1) / *dx1; | |||
| dh12 = dp2 / dp1; | |||
| @@ -140,7 +146,7 @@ L150: | |||
| goto L130; | |||
| L160: | |||
| if (! (abs(*dd2) <= RGAMSQ)) { | |||
| if (! (ABS(*dd2) <= RGAMSQ)) { | |||
| goto L190; | |||
| } | |||
| if (*dd2 == ZERO) { | |||
| @@ -157,7 +163,7 @@ L180: | |||
| goto L160; | |||
| L190: | |||
| if (! (abs(*dd2) >= GAMSQ)) { | |||
| if (! (ABS(*dd2) >= GAMSQ)) { | |||
| goto L220; | |||
| } | |||
| igo = 3; | |||
| @@ -53,6 +53,11 @@ SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX) | |||
| CCOMMON_OPT += -DTS=$(TSUFFIX) | |||
| endif | |||
| KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h | |||
| ifneq ($(NO_LAPACK), 1) | |||
| KERNEL_INTERFACE += ../common_lapack.h | |||
| endif | |||
| ifeq ($(ARCH), x86) | |||
| COMMONOBJS += cpuid.$(SUFFIX) | |||
| endif | |||
| @@ -88,9 +93,10 @@ setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h | |||
| setparam$(TSUFFIX).c : setparam-ref.c | |||
| sed 's/TS/$(TSUFFIX)/g' $< > $(@F) | |||
| kernel$(TSUFFIX).h : ../common_level1.h ../common_level2.h ../common_level3.h ../common_lapack.h | |||
| kernel$(TSUFFIX).h : $(KERNEL_INTERFACE) | |||
| sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F) | |||
| cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S | |||
| $(CC) -c $(CFLAGS) $< -o $(@F) | |||
| @@ -112,10 +118,10 @@ lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL) | |||
| cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S | |||
| $(CC) -c $(PFLAGS) $< -o $(@F) | |||
| ifdef DYNAMIC_ARCH | |||
| #ifdef DYNAMIC_ARCH | |||
| clean :: | |||
| @rm -f setparam_*.c kernel_*.h setparam.h kernel.h | |||
| endif | |||
| #endif | |||
| include $(TOPDIR)/Makefile.tail | |||
| @@ -668,7 +668,7 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@ | |||
| $(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@ | |||
| $(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ | |||
| @@ -91,15 +91,37 @@ ifndef ZGEMM_BETA | |||
| ZGEMM_BETA = ../generic/zgemm_beta.c | |||
| endif | |||
| ifndef STRSMKERNEL_LN | |||
| STRSMKERNEL_LN = trsm_kernel_LN.S | |||
| endif | |||
| ifndef STRSMKERNEL_LT | |||
| STRSMKERNEL_LT = trsm_kernel_LT.S | |||
| endif | |||
| ifndef STRSMKERNEL_RN | |||
| STRSMKERNEL_RN = trsm_kernel_LT.S | |||
| endif | |||
| ifndef STRSMKERNEL_RT | |||
| STRSMKERNEL_RT = trsm_kernel_RT.S | |||
| endif | |||
| ifndef DTRSMKERNEL_LN | |||
| DTRSMKERNEL_LN = trsm_kernel_LN.S | |||
| endif | |||
| ifndef DTRSMKERNEL_LT | |||
| DTRSMKERNEL_LT = trsm_kernel_LT.S | |||
| endif | |||
| ifndef DTRSMKERNEL_RN | |||
| DTRSMKERNEL_RN = trsm_kernel_LT.S | |||
| endif | |||
| ifndef DTRSMKERNEL_RT | |||
| DTRSMKERNEL_RT = trsm_kernel_RT.S | |||
| endif | |||
| CTRSMKERNEL_LN = ztrsm_kernel_LT.S | |||
| CTRSMKERNEL_LT = ztrsm_kernel_LT.S | |||
| @@ -1,2 +1,24 @@ | |||
| SAXPYKERNEL=axpy_loongson3a.S | |||
| DAXPYKERNEL=daxpy_loongson3a_simd.S | |||
| SGEMMKERNEL = sgemm_kernel_loongson3a.S | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| DGEMMKERNEL = gemm_kernel_loongson3a.S | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| @@ -300,7 +300,11 @@ | |||
| .align 3 | |||
| .L999: | |||
| j $31 | |||
| ADD s1, s1, s2 | |||
| #ifdef DSDOT | |||
| cvt.d.s s1, s1 | |||
| #endif | |||
| j $31 | |||
| NOP | |||
| EPILOGUE | |||
| @@ -101,7 +101,11 @@ gotoblas_t TABLE_NAME = { | |||
| #endif | |||
| ssymm_outcopyTS, ssymm_oltcopyTS, | |||
| #ifndef NO_LAPACK | |||
| sneg_tcopyTS, slaswp_ncopyTS, | |||
| #else | |||
| NULL,NULL, | |||
| #endif | |||
| 0, 0, 0, | |||
| DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), | |||
| @@ -147,7 +151,11 @@ gotoblas_t TABLE_NAME = { | |||
| #endif | |||
| dsymm_outcopyTS, dsymm_oltcopyTS, | |||
| #ifndef NO_LAPACK | |||
| dneg_tcopyTS, dlaswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| @@ -195,7 +203,11 @@ gotoblas_t TABLE_NAME = { | |||
| #endif | |||
| qsymm_outcopyTS, qsymm_oltcopyTS, | |||
| #ifndef NO_LAPACK | |||
| qneg_tcopyTS, qlaswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| #endif | |||
| @@ -286,7 +298,11 @@ gotoblas_t TABLE_NAME = { | |||
| chemm3m_oucopyrTS, chemm3m_olcopyrTS, | |||
| chemm3m_oucopyiTS, chemm3m_olcopyiTS, | |||
| #ifndef NO_LAPACK | |||
| cneg_tcopyTS, claswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| 0, 0, 0, | |||
| ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), | |||
| @@ -375,7 +391,11 @@ gotoblas_t TABLE_NAME = { | |||
| zhemm3m_oucopyrTS, zhemm3m_olcopyrTS, | |||
| zhemm3m_oucopyiTS, zhemm3m_olcopyiTS, | |||
| #ifndef NO_LAPACK | |||
| zneg_tcopyTS, zlaswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| @@ -466,7 +486,11 @@ gotoblas_t TABLE_NAME = { | |||
| xhemm3m_oucopyrTS, xhemm3m_olcopyrTS, | |||
| xhemm3m_oucopyiTS, xhemm3m_olcopyiTS, | |||
| #ifndef NO_LAPACK | |||
| xneg_tcopyTS, xlaswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| #endif | |||
| @@ -1541,5 +1541,8 @@ | |||
| popl %ebx | |||
| popl %esi | |||
| popl %edi | |||
| /*remove the hidden return value address from the stack.*/ | |||
| popl %ecx | |||
| xchgl %ecx, 0(%esp) | |||
| ret | |||
| EPILOGUE | |||
| @@ -1286,6 +1286,10 @@ | |||
| haddps %xmm0, %xmm0 | |||
| #endif | |||
| #ifdef DSDOT | |||
| cvtss2sd %xmm0, %xmm0 | |||
| #endif | |||
| RESTOREREGISTERS | |||
| ret | |||
| @@ -544,7 +544,7 @@ | |||
| jg .L11 | |||
| #if defined(TRMMKERNEL) && !defined(LEFT) | |||
| addq $1, KK | |||
| addq $4, KK | |||
| #endif | |||
| leaq (C, LDC, 4), C | |||
| @@ -594,7 +594,7 @@ | |||
| jg .L11 | |||
| #if defined(TRMMKERNEL) && !defined(LEFT) | |||
| addq $1, KK | |||
| addq $4, KK | |||
| #endif | |||
| leaq (C, LDC, 4), C | |||
| @@ -0,0 +1,21 @@ | |||
| /*This is only for "make install" target.*/ | |||
| #ifdef NEEDBUNDERSCORE | |||
| #define BLASFUNC(FUNC) FUNC##_ | |||
| #else | |||
| #define BLASFUNC(FUNC) FUNC | |||
| #endif | |||
| #if defined(OS_WINDOWS) && defined(__64BIT__) | |||
| typedef long long BLASLONG; | |||
| typedef unsigned long long BLASULONG; | |||
| #else | |||
| typedef long BLASLONG; | |||
| typedef unsigned long BLASULONG; | |||
| #endif | |||
| #ifdef USE64BITINT | |||
| typedef BLASLONG blasint; | |||
| #else | |||
| typedef int blasint; | |||
| #endif | |||
| @@ -1480,27 +1480,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define SGEMM_DEFAULT_UNROLL_N 8 | |||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||
| #define DGEMM_DEFAULT_UNROLL_N 8 | |||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| #define DGEMM_DEFAULT_UNROLL_M 4 | |||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||
| #define CGEMM_DEFAULT_UNROLL_M 1 | |||
| #define CGEMM_DEFAULT_UNROLL_N 4 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | |||
| #define SGEMM_DEFAULT_P 108 | |||
| #define DGEMM_DEFAULT_P 112 | |||
| #define SGEMM_DEFAULT_P 32 | |||
| #define DGEMM_DEFAULT_P 32 | |||
| #define CGEMM_DEFAULT_P 108 | |||
| #define ZGEMM_DEFAULT_P 112 | |||
| #define SGEMM_DEFAULT_Q 288 | |||
| #define DGEMM_DEFAULT_Q 144 | |||
| #define SGEMM_DEFAULT_Q 116 | |||
| #define DGEMM_DEFAULT_Q 116 | |||
| #define CGEMM_DEFAULT_Q 144 | |||
| #define ZGEMM_DEFAULT_Q 72 | |||
| #define SGEMM_DEFAULT_R 2000 | |||
| #define DGEMM_DEFAULT_R 2000 | |||
| #define SGEMM_DEFAULT_R 1000 | |||
| #define DGEMM_DEFAULT_R 1000 | |||
| #define CGEMM_DEFAULT_R 2000 | |||
| #define ZGEMM_DEFAULT_R 2000 | |||
| @@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system | |||
| TARGET=openblas_utest | |||
| CUNIT_LIB=/usr/local/lib/libcunit.a | |||
| OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o | |||
| OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o | |||
| all : run_test | |||
| $(TARGET): $(OBJS) | |||
| $(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) | |||
| $(FC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) | |||
| run_test: $(TARGET) | |||
| ./$(TARGET) | |||
| @@ -57,4 +57,8 @@ void test_caxpy_inc_0(void); | |||
| void test_zdotu_n_1(void); | |||
| void test_zdotu_offset_1(void); | |||
| void test_drotmg(void); | |||
| void test_dsdot_n_1(void); | |||
| #endif | |||
| @@ -54,7 +54,10 @@ CU_TestInfo test_level1[]={ | |||
| {"Testing zdotu with n == 1",test_zdotu_n_1}, | |||
| {"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1}, | |||
| {"Testing drotmg",test_drotmg}, | |||
| {"Testing dsdot with n == 1",test_dsdot_n_1}, | |||
| CU_TEST_INFO_NULL, | |||
| }; | |||
| @@ -0,0 +1,50 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #include "common_utest.h" | |||
| void test_dsdot_n_1() | |||
| { | |||
| float x= 0.172555164; | |||
| float y= -0.0138700781; | |||
| int incx=1; | |||
| int incy=1; | |||
| int n=1; | |||
| double res1=0.0f, res2=0.0f; | |||
| res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy); | |||
| res2=BLASFUNC_REF(dsdot)(&n, &x, &incx, &y, &incy); | |||
| CU_ASSERT_DOUBLE_EQUAL(res1, res2, CHECK_EPS); | |||
| } | |||
| @@ -0,0 +1,60 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #include "common_utest.h" | |||
| void test_drotmg() | |||
| { | |||
| double te_d1, tr_d1; | |||
| double te_d2, tr_d2; | |||
| double te_x1, tr_x1; | |||
| double te_y1, tr_y1; | |||
| double te_param[5],tr_param[5]; | |||
| int i=0; | |||
| te_d1= tr_d1=0.21149573940783739; | |||
| te_d2= tr_d2=0.046892057172954082; | |||
| te_x1= tr_x1=-0.42272687517106533; | |||
| te_y1= tr_y1=0.42211309121921659; | |||
| //OpenBLAS | |||
| BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); | |||
| //reference | |||
| BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param); | |||
| CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS); | |||
| for(i=0; i<5; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS); | |||
| } | |||
| } | |||