| @@ -1,8 +1,13 @@ | |||
| *.obj | |||
| *.lib | |||
| *.dll | |||
| *.def | |||
| *.o | |||
| lapack-3.1.1 | |||
| lapack-3.1.1.tgz | |||
| *.so | |||
| *.a | |||
| .svn | |||
| *~ | |||
| config.h | |||
| Makefile.conf | |||
| @@ -1,13 +1,40 @@ | |||
| OpenBLAS ChangeLog | |||
| ==================================================================== | |||
| Version 0.1 alpha2(in development) | |||
| Version 0.1 alpha2 | |||
| 23-Jun-2011 | |||
| common: | |||
| * | |||
| * Fixed blasint undefined bug in <cblas.h> file. Other software | |||
| could include this header successfully(Refs issue #13 on github) | |||
| * Fixed the SEGFAULT bug on 64 cores. On SMP server, the number | |||
| of CPUs or cores should be less than or equal to 64.(Refs issue #14 | |||
| on github) | |||
| * Support "void goto_set_num_threads(int num_threads)" and "void | |||
| openblas_set_num_threads(int num_threads)" when USE_OPENMP=1 | |||
| * Added extern "C" to support C++. Thank Tasio for the patch(Refs | |||
| issue #21 on github) | |||
| * Provided an error message when the arch is not supported.(Refs | |||
| issue #19 on github) | |||
| * Fixed issue #23. Fixed a bug of f_check script about generating link flags. | |||
| * Added openblas_set_num_threads for Fortran. | |||
| * Fixed #25 a wrong result of rotmg. | |||
| * Fixed a bug about detecting underscore prefix in c_check. | |||
| * Print the wall time (cycles) with enabling FUNCTION_PROFILE | |||
| * Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1 | |||
| * Added install target. You can use "make install". (Refs #20) | |||
| x86/x86_64: | |||
| * | |||
| * Fixed #28 a wrong result of dsdot on x86_64. | |||
| * Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6. | |||
| * Fixed #33 ztrmm bug on Nehalem. | |||
| * Walk round #27 the low performance axpy issue with small imput size & multithreads. | |||
| MIPS64: | |||
| * | |||
| * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. | |||
| * Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2) | |||
| * Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3) | |||
| ==================================================================== | |||
| Version 0.1 alpha1 | |||
| 20-Mar-2011 | |||
| @@ -15,6 +15,10 @@ ifdef SANITY_CHECK | |||
| BLASDIRS += reference | |||
| endif | |||
| ifndef PREFIX | |||
| PREFIX = /opt/OpenBLAS | |||
| endif | |||
| SUBDIRS = $(BLASDIRS) | |||
| ifneq ($(NO_LAPACK), 1) | |||
| SUBDIRS += lapack | |||
| @@ -22,8 +26,8 @@ endif | |||
| SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench | |||
| .PHONY : all libs netlib test ctest shared | |||
| .NOTPARALLEL : all libs prof lapack-test | |||
| .PHONY : all libs netlib test ctest shared install | |||
| .NOTPARALLEL : all libs prof lapack-test install | |||
| all :: libs netlib tests shared | |||
| @echo | |||
| @@ -70,7 +74,7 @@ ifeq ($(OSNAME), Darwin) | |||
| endif | |||
| ifeq ($(OSNAME), WINNT) | |||
| $(MAKE) -C exports dll | |||
| # -ln -fs $(LIBDLLNAME) libopenblas.dll | |||
| -ln -fs $(LIBDLLNAME) libopenblas.dll | |||
| endif | |||
| ifeq ($(OSNAME), CYGWIN_NT) | |||
| $(MAKE) -C exports dll | |||
| @@ -96,18 +100,26 @@ endif | |||
| endif | |||
| libs : | |||
| ifeq ($(CORE), UNKOWN) | |||
| $(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.) | |||
| endif | |||
| -ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX) | |||
| for d in $(SUBDIRS) ; \ | |||
| do if test -d $$d; then \ | |||
| $(MAKE) -C $$d $(@F) || exit 1 ; \ | |||
| fi; \ | |||
| done | |||
| #Save the config files for installation | |||
| cp Makefile.conf Makefile.conf_last | |||
| cp config.h config_last.h | |||
| ifdef DYNAMIC_ARCH | |||
| $(MAKE) -C kernel commonlibs || exit 1 | |||
| for d in $(DYNAMIC_CORE) ; \ | |||
| do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ | |||
| done | |||
| echo DYNAMIC_ARCH=1 >> Makefile.conf_last | |||
| endif | |||
| touch lib.grd | |||
| prof : prof_blas prof_lapack | |||
| @@ -227,19 +239,23 @@ lapack-test : | |||
| dummy : | |||
| install : | |||
| $(MAKE) -f Makefile.install install | |||
| clean :: | |||
| @for d in $(SUBDIRS_ALL) ; \ | |||
| do if test -d $$d; then \ | |||
| $(MAKE) -C $$d $(@F) || exit 1 ; \ | |||
| fi; \ | |||
| done | |||
| ifdef DYNAMIC_ARCH | |||
| #ifdef DYNAMIC_ARCH | |||
| @$(MAKE) -C kernel clean | |||
| endif | |||
| #endif | |||
| @rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h | |||
| @rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib | |||
| @if test -d lapack-3.1.1; then \ | |||
| echo deleting lapack-3.1.1; \ | |||
| rm -rf lapack-3.1.1 ;\ | |||
| fi | |||
| @rm -f *.grd Makefile.conf_last config_last.h | |||
| @echo Done. | |||
| @@ -0,0 +1,65 @@ | |||
| TOPDIR = . | |||
| export GOTOBLAS_MAKEFILE = 1 | |||
| -include $(TOPDIR)/Makefile.conf_last | |||
| include ./Makefile.system | |||
| .PHONY : install | |||
| .NOTPARALLEL : install | |||
| lib.grd : | |||
| $(error OpenBLAS: Please run "make" firstly) | |||
| install : lib.grd | |||
| @-mkdir -p $(PREFIX) | |||
| @echo Generating openblas_config.h in $(PREFIX) | |||
| #for inc | |||
| @echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h | |||
| @echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h | |||
| @cat config_last.h >> $(PREFIX)/openblas_config.h | |||
| @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h | |||
| @cat openblas_config_template.h >> $(PREFIX)/openblas_config.h | |||
| @echo \#endif >> $(PREFIX)/openblas_config.h | |||
| @echo Generating f77blas.h in $(PREFIX) | |||
| @echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h | |||
| @echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h | |||
| @echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h | |||
| @cat common_interface.h >> $(PREFIX)/f77blas.h | |||
| @echo \#endif >> $(PREFIX)/f77blas.h | |||
| @echo Generating cblas.h in $(PREFIX) | |||
| @sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h | |||
| #for install static library | |||
| @echo Copy the static library to $(PREFIX) | |||
| @cp $(LIBNAME) $(PREFIX) | |||
| @-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX) | |||
| #for install shared library | |||
| @echo Copy the shared library to $(PREFIX) | |||
| ifeq ($(OSNAME), Linux) | |||
| -cp $(LIBSONAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so | |||
| endif | |||
| ifeq ($(OSNAME), FreeBSD) | |||
| -cp $(LIBSONAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so | |||
| endif | |||
| ifeq ($(OSNAME), NetBSD) | |||
| -cp $(LIBSONAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so | |||
| endif | |||
| ifeq ($(OSNAME), Darwin) | |||
| -cp $(LIBDYNNAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib | |||
| endif | |||
| ifeq ($(OSNAME), WINNT) | |||
| -cp $(LIBDLLNAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll | |||
| endif | |||
| ifeq ($(OSNAME), CYGWIN_NT) | |||
| -cp $(LIBDLLNAME) $(PREFIX) | |||
| -ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll | |||
| endif | |||
| @echo Install OK! | |||
| @@ -91,6 +91,9 @@ VERSION = 0.1alpha2 | |||
| # SANITY_CHECK to compare the result with reference BLAS. | |||
| # UTEST_CHECK = 1 | |||
| # The installation directory. | |||
| # PREFIX = /opt/OpenBLAS | |||
| # Common Optimization Flag; -O2 is enough. | |||
| # DEBUG = 1 | |||
| @@ -30,6 +30,10 @@ ifdef TARGET | |||
| GETARCH_FLAGS += -DFORCE_$(TARGET) | |||
| endif | |||
| ifdef INTERFACE64 | |||
| GETARCH_FLAGS += -DUSE64BITINT | |||
| endif | |||
| # This operation is expensive, so execution should be once. | |||
| ifndef GOTOBLAS_MAKEFILE | |||
| export GOTOBLAS_MAKEFILE = 1 | |||
| @@ -185,7 +189,7 @@ ifeq ($(C_COMPILER), INTEL) | |||
| CCOMMON_OPT += -wd981 | |||
| endif | |||
| ifdef USE_OPENMP | |||
| ifeq ($(USE_OPENMP), 1) | |||
| ifeq ($(C_COMPILER), GCC) | |||
| CCOMMON_OPT += -fopenmp | |||
| endif | |||
| @@ -489,7 +493,8 @@ endif | |||
| ifdef BINARY64 | |||
| ifdef INTERFACE64 | |||
| CCOMMON_OPT += -DUSE64BITINT | |||
| CCOMMON_OPT += | |||
| #-DUSE64BITINT | |||
| endif | |||
| endif | |||
| @@ -510,6 +515,10 @@ ifeq ($(DYNAMIC_ARCH), 1) | |||
| CCOMMON_OPT += -DDYNAMIC_ARCH | |||
| endif | |||
| ifeq ($(NO_LAPACK), 1) | |||
| CCOMMON_OPT += -DNO_LAPACK | |||
| endif | |||
| ifdef SMP | |||
| CCOMMON_OPT += -DSMP_SERVER | |||
| @@ -8,7 +8,9 @@ Download from project homepage. http://xianyi.github.com/OpenBLAS/ | |||
| Or, | |||
| check out codes from git://github.com/xianyi/OpenBLAS.git | |||
| 1)Normal compile | |||
| Please read GotoBLAS_02QuickInstall.txt or type "make" | |||
| (a) type "make" to detect the CPU automatically. | |||
| or | |||
| (b) type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt. | |||
| 2)Cross compile | |||
| Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly. | |||
| @@ -20,6 +22,11 @@ make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-g | |||
| 3)Debug version | |||
| make DEBUG=1 | |||
| 4)Intall to the directory (Optional) | |||
| e.g. | |||
| make install PREFIX=your_installation_directory | |||
| The default directory is /opt/OpenBLAS | |||
| 3.Support CPU & OS | |||
| Please read GotoBLAS_01Readme.txt | |||
| @@ -39,13 +46,17 @@ export GOTO_NUM_THREADS=4 | |||
| or | |||
| export OMP_NUM_THREADS=4 | |||
| The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS. | |||
| The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS. | |||
| If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable. | |||
| 4.2 Set the number of threads with calling functions. for example, | |||
| void goto_set_num_threads(int num_threads); | |||
| or | |||
| void openblas_set_num_threads(int num_threads); | |||
| If you compile this lib with USE_OPENMP=1, you should use the above functions, too. | |||
| 5.Report Bugs | |||
| Please add a issue in https://github.com/xianyi/OpenBLAS/issues | |||
| @@ -56,4 +67,17 @@ Optimization on ICT Loongson 3A CPU | |||
| OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas | |||
| 8.ChangeLog | |||
| Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. | |||
| Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. | |||
| 9.Known Issues | |||
| * The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit | |||
| is 64. On 32 bits, it is 32. | |||
| * This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully. | |||
| 10. Specification of Git Branches | |||
| We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). | |||
| Now, there are 4 branches in github.com. | |||
| * The master branch. This a main branch to reflect a production-ready state. | |||
| * The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. | |||
| * The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. | |||
| * The gh-pages branch. This is for web pages | |||
| @@ -0,0 +1,57 @@ | |||
| Force Target Examples: | |||
| make TARGET=NEHALEM | |||
| make TARGET=LOONGSON3A BINARY=64 | |||
| make TARGET=ISTANBUL | |||
| Supported List: | |||
| 1.X86/X86_64 | |||
| a)Intel CPU: | |||
| P2 | |||
| COPPERMINE | |||
| KATMAI | |||
| NORTHWOOD | |||
| PRESCOTT | |||
| BANIAS | |||
| YONAH | |||
| CORE2 | |||
| PENRYN | |||
| DUNNINGTON | |||
| NEHALEM | |||
| ATOM | |||
| b)AMD CPU: | |||
| ATHLON | |||
| OPTERON | |||
| OPTERON_SSE3 | |||
| BARCELONA | |||
| SHANGHAI | |||
| ISTANBUL | |||
| c)VIA CPU: | |||
| SSE_GENERIC | |||
| VIAC3 | |||
| NANO | |||
| 2.Power CPU: | |||
| POWER4 | |||
| POWER5 | |||
| POWER6 | |||
| PPCG4 | |||
| PPC970 | |||
| PPC970MP | |||
| PPC440 | |||
| PPC440FP2 | |||
| CELL | |||
| 3.MIPS64 CPU: | |||
| SICORTEX | |||
| LOONGSON3A | |||
| 4.IA64 CPU: | |||
| ITANIUM2 | |||
| 5.SPARC CPU: | |||
| SPARC | |||
| SPARCV7 | |||
| @@ -149,7 +149,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/); | |||
| $data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; | |||
| $data =~ /globl\ ([_\.]*)(.*)/; | |||
| $data =~ /globl\s([_\.]*)(.*)/; | |||
| $need_fu = $1; | |||
| @@ -1,6 +1,14 @@ | |||
| #ifndef CBLAS_H | |||
| #define CBLAS_H | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| /* Assume C declarations for C++ */ | |||
| #endif /* __cplusplus */ | |||
| #include <stddef.h> | |||
| #include "common.h" | |||
| #define CBLAS_INDEX size_t | |||
| enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; | |||
| @@ -270,4 +278,10 @@ void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANS | |||
| double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); | |||
| void cblas_xerbla(blasint p, char *rout, char *form, ...); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif /* __cplusplus */ | |||
| #endif | |||
| @@ -39,6 +39,11 @@ | |||
| #ifndef COMMON_H | |||
| #define COMMON_H | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| /* Assume C declarations for C++ */ | |||
| #endif /* __cplusplus */ | |||
| #ifndef _GNU_SOURCE | |||
| #define _GNU_SOURCE | |||
| #endif | |||
| @@ -607,4 +612,9 @@ extern int gotoblas_profile; | |||
| #define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME) | |||
| #endif | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif /* __cplusplus */ | |||
| #endif | |||
| @@ -60,4 +60,8 @@ float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *, | |||
| double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *); | |||
| double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *); | |||
| void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *); | |||
| double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*); | |||
| #endif | |||
| @@ -1302,24 +1302,25 @@ int get_coretype(void){ | |||
| case 13: | |||
| return CORE_DUNNINGTON; | |||
| } | |||
| break; | |||
| case 2: | |||
| switch (model) { | |||
| case 5: | |||
| //Intel Core (Clarkdale) / Core (Arrandale) | |||
| // Pentium (Clarkdale) / Pentium Mobile (Arrandale) | |||
| // Xeon (Clarkdale), 32nm | |||
| return CORE_NEHALEM; | |||
| case 12: | |||
| //Xeon Processor 5600 (Westmere-EP) | |||
| return CORE_NEHALEM; | |||
| } | |||
| break; | |||
| break; | |||
| case 2: | |||
| switch (model) { | |||
| case 5: | |||
| //Intel Core (Clarkdale) / Core (Arrandale) | |||
| // Pentium (Clarkdale) / Pentium Mobile (Arrandale) | |||
| // Xeon (Clarkdale), 32nm | |||
| return CORE_NEHALEM; | |||
| case 12: | |||
| //Xeon Processor 5600 (Westmere-EP) | |||
| return CORE_NEHALEM; | |||
| } | |||
| break; | |||
| } | |||
| break; | |||
| case 15: | |||
| if (model <= 0x2) return CORE_NORTHWOOD; | |||
| return CORE_PRESCOTT; | |||
| if (model <= 0x2) return CORE_NORTHWOOD; | |||
| else return CORE_PRESCOTT; | |||
| } | |||
| } | |||
| @@ -6,7 +6,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) | |||
| COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX) | |||
| ifdef SMP | |||
| COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) | |||
| COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) | |||
| ifndef NO_AFFINITY | |||
| COMMONOBJS += init.$(SUFFIX) | |||
| endif | |||
| @@ -100,6 +100,9 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h | |||
| blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| @@ -38,7 +38,7 @@ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include <sys/mman.h> | |||
| //#include <sys/mman.h> | |||
| #include "common.h" | |||
| #ifndef USE_OPENMP | |||
| @@ -49,6 +49,26 @@ | |||
| int blas_server_avail = 0; | |||
| void goto_set_num_threads(int num_threads) { | |||
| if (num_threads < 1) num_threads = blas_num_threads; | |||
| if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; | |||
| if (num_threads > blas_num_threads) { | |||
| blas_num_threads = num_threads; | |||
| } | |||
| blas_cpu_number = num_threads; | |||
| omp_set_num_threads(blas_cpu_number); | |||
| } | |||
| void openblas_set_num_threads(int num_threads) { | |||
| goto_set_num_threads(num_threads); | |||
| } | |||
| int blas_thread_init(void){ | |||
| blas_get_cpu_number(); | |||
| @@ -172,13 +172,20 @@ static inline int rcount(unsigned long number) { | |||
| return count; | |||
| } | |||
| /*** | |||
| Known issue: The number of CPUs/cores should less | |||
| than sizeof(unsigned long). On 64 bits, the limit | |||
| is 64. On 32 bits, it is 32. | |||
| ***/ | |||
| static inline unsigned long get_cpumap(int node) { | |||
| int infile; | |||
| unsigned long affinity; | |||
| char name[160]; | |||
| char cpumap[160]; | |||
| char *p, *dummy; | |||
| int i=0; | |||
| sprintf(name, CPUMAP_NAME, node); | |||
| infile = open(name, O_RDONLY); | |||
| @@ -187,13 +194,19 @@ static inline unsigned long get_cpumap(int node) { | |||
| if (infile != -1) { | |||
| read(infile, name, sizeof(name)); | |||
| read(infile, cpumap, sizeof(cpumap)); | |||
| p = cpumap; | |||
| while (*p != '\n' && i<160){ | |||
| if(*p != ',') { | |||
| name[i++]=*p; | |||
| } | |||
| p++; | |||
| } | |||
| p = name; | |||
| while ((*p == '0') || (*p == ',')) p++; | |||
| // while ((*p == '0') || (*p == ',')) p++; | |||
| affinity = strtol(p, &dummy, 16); | |||
| affinity = strtoul(p, &dummy, 16); | |||
| close(infile); | |||
| } | |||
| @@ -347,7 +360,13 @@ static void disable_hyperthread(void) { | |||
| unsigned long share; | |||
| int cpu; | |||
| common -> avail = (1UL << common -> num_procs) - 1; | |||
| if(common->num_procs > 64){ | |||
| fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); | |||
| exit(1); | |||
| }else if(common->num_procs == 64){ | |||
| common -> avail = 0xFFFFFFFFFFFFFFFFUL; | |||
| }else | |||
| common -> avail = (1UL << common -> num_procs) - 1; | |||
| #ifdef DEBUG | |||
| fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail); | |||
| @@ -376,7 +395,13 @@ static void disable_affinity(void) { | |||
| fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]); | |||
| #endif | |||
| lprocmask = (1UL << common -> final_num_procs) - 1; | |||
| if(common->final_num_procs > 64){ | |||
| fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); | |||
| exit(1); | |||
| }else if(common->final_num_procs == 64){ | |||
| lprocmask = 0xFFFFFFFFFFFFFFFFUL; | |||
| }else | |||
| lprocmask = (1UL << common -> final_num_procs) - 1; | |||
| #ifndef USE_OPENMP | |||
| lprocmask &= *(unsigned long *)&cpu_orig_mask[0]; | |||
| @@ -0,0 +1,45 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #include "common.h" | |||
| #ifdef SMP_SERVER | |||
| #ifdef OS_LINUX | |||
| extern void openblas_set_num_threads(int num_threads) ; | |||
| void NAME(int* num_threads){ | |||
| openblas_set_num_threads(*num_threads); | |||
| } | |||
| #endif | |||
| #endif | |||
| @@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) { | |||
| if (cycles > 0) { | |||
| fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n"); | |||
| fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n"); | |||
| fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n"); | |||
| for (i = 0; i < MAX_PROF_TABLE; i ++) { | |||
| if (function_profile_table[i].calls) { | |||
| #ifndef OS_WINDOWS | |||
| fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n", | |||
| fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n", | |||
| #else | |||
| fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n", | |||
| fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n", | |||
| #endif | |||
| func_table[i], | |||
| function_profile_table[i].calls, | |||
| (double)function_profile_table[i].cycles / (double)cycles * 100., | |||
| (double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100., | |||
| (double)function_profile_table[i].area / (double)function_profile_table[i].cycles | |||
| (double)function_profile_table[i].area / (double)function_profile_table[i].cycles, | |||
| function_profile_table[i].cycles | |||
| ); | |||
| } | |||
| } | |||
| @@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME) | |||
| zip : dll | |||
| zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME) | |||
| dll : libgoto2.dll | |||
| dll : ../$(LIBDLLNAME) | |||
| #libgoto2.dll | |||
| dll2 : libgoto2_shared.dll | |||
| libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) | |||
| ../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) | |||
| $(RANLIB) ../$(LIBNAME) | |||
| ifeq ($(BINARY32), 1) | |||
| $(DLLWRAP) -o $(@F) --def libgoto2.def \ | |||
| $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ | |||
| --entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | |||
| -lib /machine:i386 /def:libgoto2.def | |||
| else | |||
| $(DLLWRAP) -o $(@F) --def libgoto2.def \ | |||
| $(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ | |||
| --entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | |||
| -lib /machine:X64 /def:libgoto2.def | |||
| endif | |||
| @@ -84,7 +85,7 @@ libgoto_hpl.def : gensymbol | |||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F) | |||
| $(LIBDYNNAME) : ../$(LIBNAME) osx.def | |||
| $(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o $(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||
| $(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||
| symbol.$(SUFFIX) : symbol.S | |||
| $(CC) $(CFLAGS) -c -o $(@F) $^ | |||
| @@ -274,6 +274,7 @@ if ($link ne "") { | |||
| && ($flags !~ /kernel32/) | |||
| && ($flags !~ /advapi32/) | |||
| && ($flags !~ /shell32/) | |||
| && ($flags !~ /^\-l$/) | |||
| ) { | |||
| $linker_l .= $flags . " "; | |||
| } | |||
| @@ -604,30 +604,41 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #ifndef POWER | |||
| #define POWER | |||
| #endif | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #if defined(__i386__) || (__x86_64__) | |||
| #include "cpuid_x86.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifdef __ia64__ | |||
| #include "cpuid_ia64.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifdef __alpha | |||
| #include "cpuid_alpha.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifdef POWER | |||
| #include "cpuid_power.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifdef sparc | |||
| #include "cpuid_sparc.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifdef __mips__ | |||
| #include "cpuid_mips.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifndef OPENBLAS_SUPPORTED | |||
| #error "This arch/CPU is not supported by OpenBLAS." | |||
| #endif | |||
| #else | |||
| @@ -30,6 +30,10 @@ int main(int argc, char **argv) { | |||
| printf("#define DLOCAL_BUFFER_SIZE\t%ld\n", (DGEMM_DEFAULT_Q * DGEMM_DEFAULT_UNROLL_N * 2 * 1 * sizeof(double))); | |||
| printf("#define CLOCAL_BUFFER_SIZE\t%ld\n", (CGEMM_DEFAULT_Q * CGEMM_DEFAULT_UNROLL_N * 4 * 2 * sizeof(float))); | |||
| printf("#define ZLOCAL_BUFFER_SIZE\t%ld\n", (ZGEMM_DEFAULT_Q * ZGEMM_DEFAULT_UNROLL_N * 2 * 2 * sizeof(double))); | |||
| #ifdef USE64BITINT | |||
| printf("#define USE64BITINT\n"); | |||
| #endif | |||
| } | |||
| return 0; | |||
| @@ -85,7 +85,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc | |||
| //In that case, the threads would be dependent. | |||
| if (incx == 0 || incy == 0) | |||
| nthreads = 1; | |||
| //Temporarily walk around the low performance issue with small imput size & multithreads. | |||
| if (n <= 10000) | |||
| nthreads = 1; | |||
| if (nthreads == 1) { | |||
| #endif | |||
| @@ -49,6 +49,7 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ | |||
| BLASLONG n = *N; | |||
| BLASLONG incx = *INCX; | |||
| BLASLONG incy = *INCY; | |||
| double ret = 0.0; | |||
| PRINT_DEBUG_NAME; | |||
| @@ -61,19 +62,21 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ | |||
| if (incx < 0) x -= (n - 1) * incx; | |||
| if (incy < 0) y -= (n - 1) * incy; | |||
| return DSDOT_K(n, x, incx, y, incy); | |||
| ret=DSDOT_K(n, x, incx, y, incy); | |||
| FUNCTION_PROFILE_END(1, n, n); | |||
| IDEBUG_END; | |||
| return 0; | |||
| return ret; | |||
| } | |||
| #else | |||
| double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ | |||
| double ret = 0.0; | |||
| PRINT_DEBUG_CNAME; | |||
| @@ -86,13 +89,13 @@ double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ | |||
| if (incx < 0) x -= (n - 1) * incx; | |||
| if (incy < 0) y -= (n - 1) * incy; | |||
| return DSDOT_K(n, x, incx, y, incy); | |||
| ret=DSDOT_K(n, x, incx, y, incy); | |||
| FUNCTION_PROFILE_END(1, n, n); | |||
| IDEBUG_END; | |||
| return 0; | |||
| return ret; | |||
| } | |||
| @@ -7,6 +7,12 @@ | |||
| #define GAMSQ 16777216.e0 | |||
| #define RGAMSQ 5.9604645e-8 | |||
| #ifdef DOUBLE | |||
| #define ABS(x) fabs(x) | |||
| #else | |||
| #define ABS(x) fabsf(x) | |||
| #endif | |||
| #ifndef CBLAS | |||
| void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){ | |||
| @@ -47,7 +53,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||
| dq2 = dp2 * dy1; | |||
| dq1 = dp1 * *dx1; | |||
| if (! (abs(dq1) > abs(dq2))) goto L40; | |||
| if (! (ABS(dq1) > ABS(dq2))) goto L40; | |||
| dh21 = -(dy1) / *dx1; | |||
| dh12 = dp2 / dp1; | |||
| @@ -140,7 +146,7 @@ L150: | |||
| goto L130; | |||
| L160: | |||
| if (! (abs(*dd2) <= RGAMSQ)) { | |||
| if (! (ABS(*dd2) <= RGAMSQ)) { | |||
| goto L190; | |||
| } | |||
| if (*dd2 == ZERO) { | |||
| @@ -157,7 +163,7 @@ L180: | |||
| goto L160; | |||
| L190: | |||
| if (! (abs(*dd2) >= GAMSQ)) { | |||
| if (! (ABS(*dd2) >= GAMSQ)) { | |||
| goto L220; | |||
| } | |||
| igo = 3; | |||
| @@ -53,6 +53,11 @@ SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX) | |||
| CCOMMON_OPT += -DTS=$(TSUFFIX) | |||
| endif | |||
| KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h | |||
| ifneq ($(NO_LAPACK), 1) | |||
| KERNEL_INTERFACE += ../common_lapack.h | |||
| endif | |||
| ifeq ($(ARCH), x86) | |||
| COMMONOBJS += cpuid.$(SUFFIX) | |||
| endif | |||
| @@ -88,9 +93,10 @@ setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h | |||
| setparam$(TSUFFIX).c : setparam-ref.c | |||
| sed 's/TS/$(TSUFFIX)/g' $< > $(@F) | |||
| kernel$(TSUFFIX).h : ../common_level1.h ../common_level2.h ../common_level3.h ../common_lapack.h | |||
| kernel$(TSUFFIX).h : $(KERNEL_INTERFACE) | |||
| sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F) | |||
| cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S | |||
| $(CC) -c $(CFLAGS) $< -o $(@F) | |||
| @@ -112,10 +118,10 @@ lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL) | |||
| cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S | |||
| $(CC) -c $(PFLAGS) $< -o $(@F) | |||
| ifdef DYNAMIC_ARCH | |||
| #ifdef DYNAMIC_ARCH | |||
| clean :: | |||
| @rm -f setparam_*.c kernel_*.h setparam.h kernel.h | |||
| endif | |||
| #endif | |||
| include $(TOPDIR)/Makefile.tail | |||
| @@ -668,7 +668,7 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@ | |||
| $(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@ | |||
| $(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ | |||
| @@ -300,7 +300,11 @@ | |||
| .align 3 | |||
| .L999: | |||
| j $31 | |||
| ADD s1, s1, s2 | |||
| #ifdef DSDOT | |||
| cvt.d.s s1, s1 | |||
| #endif | |||
| j $31 | |||
| NOP | |||
| EPILOGUE | |||
| @@ -101,7 +101,11 @@ gotoblas_t TABLE_NAME = { | |||
| #endif | |||
| ssymm_outcopyTS, ssymm_oltcopyTS, | |||
| #ifndef NO_LAPACK | |||
| sneg_tcopyTS, slaswp_ncopyTS, | |||
| #else | |||
| NULL,NULL, | |||
| #endif | |||
| 0, 0, 0, | |||
| DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), | |||
| @@ -147,7 +151,11 @@ gotoblas_t TABLE_NAME = { | |||
| #endif | |||
| dsymm_outcopyTS, dsymm_oltcopyTS, | |||
| #ifndef NO_LAPACK | |||
| dneg_tcopyTS, dlaswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| @@ -195,7 +203,11 @@ gotoblas_t TABLE_NAME = { | |||
| #endif | |||
| qsymm_outcopyTS, qsymm_oltcopyTS, | |||
| #ifndef NO_LAPACK | |||
| qneg_tcopyTS, qlaswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| #endif | |||
| @@ -286,7 +298,11 @@ gotoblas_t TABLE_NAME = { | |||
| chemm3m_oucopyrTS, chemm3m_olcopyrTS, | |||
| chemm3m_oucopyiTS, chemm3m_olcopyiTS, | |||
| #ifndef NO_LAPACK | |||
| cneg_tcopyTS, claswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| 0, 0, 0, | |||
| ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), | |||
| @@ -375,7 +391,11 @@ gotoblas_t TABLE_NAME = { | |||
| zhemm3m_oucopyrTS, zhemm3m_olcopyrTS, | |||
| zhemm3m_oucopyiTS, zhemm3m_olcopyiTS, | |||
| #ifndef NO_LAPACK | |||
| zneg_tcopyTS, zlaswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| @@ -466,7 +486,11 @@ gotoblas_t TABLE_NAME = { | |||
| xhemm3m_oucopyrTS, xhemm3m_olcopyrTS, | |||
| xhemm3m_oucopyiTS, xhemm3m_olcopyiTS, | |||
| #ifndef NO_LAPACK | |||
| xneg_tcopyTS, xlaswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| #endif | |||
| @@ -1541,5 +1541,8 @@ | |||
| popl %ebx | |||
| popl %esi | |||
| popl %edi | |||
| /*remove the hidden return value address from the stack.*/ | |||
| popl %ecx | |||
| xchgl %ecx, 0(%esp) | |||
| ret | |||
| EPILOGUE | |||
| @@ -1286,6 +1286,10 @@ | |||
| haddps %xmm0, %xmm0 | |||
| #endif | |||
| #ifdef DSDOT | |||
| cvtss2sd %xmm0, %xmm0 | |||
| #endif | |||
| RESTOREREGISTERS | |||
| ret | |||
| @@ -544,7 +544,7 @@ | |||
| jg .L11 | |||
| #if defined(TRMMKERNEL) && !defined(LEFT) | |||
| addq $1, KK | |||
| addq $4, KK | |||
| #endif | |||
| leaq (C, LDC, 4), C | |||
| @@ -594,7 +594,7 @@ | |||
| jg .L11 | |||
| #if defined(TRMMKERNEL) && !defined(LEFT) | |||
| addq $1, KK | |||
| addq $4, KK | |||
| #endif | |||
| leaq (C, LDC, 4), C | |||
| @@ -0,0 +1,21 @@ | |||
| /*This is only for "make install" target.*/ | |||
| #ifdef NEEDBUNDERSCORE | |||
| #define BLASFUNC(FUNC) FUNC##_ | |||
| #else | |||
| #define BLASFUNC(FUNC) FUNC | |||
| #endif | |||
| #if defined(OS_WINDOWS) && defined(__64BIT__) | |||
| typedef long long BLASLONG; | |||
| typedef unsigned long long BLASULONG; | |||
| #else | |||
| typedef long BLASLONG; | |||
| typedef unsigned long BLASULONG; | |||
| #endif | |||
| #ifdef USE64BITINT | |||
| typedef BLASLONG blasint; | |||
| #else | |||
| typedef int blasint; | |||
| #endif | |||
| @@ -128,6 +128,8 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) | |||
| ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) | |||
| XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) | |||
| ifneq ($(NO_LAPACK), 1) | |||
| SBLASOBJS += \ | |||
| sgetf2f.$(SUFFIX) sgetrff.$(SUFFIX) slauu2f.$(SUFFIX) slauumf.$(SUFFIX) \ | |||
| spotf2f.$(SUFFIX) spotrff.$(SUFFIX) strti2f.$(SUFFIX) strtrif.$(SUFFIX) \ | |||
| @@ -160,6 +162,7 @@ XBLASOBJS += | |||
| xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \ | |||
| xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \ | |||
| endif | |||
| include $(TOPDIR)/Makefile.tail | |||
| @@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system | |||
| TARGET=openblas_utest | |||
| CUNIT_LIB=/usr/local/lib/libcunit.a | |||
| OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o | |||
| OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o | |||
| all : run_test | |||
| $(TARGET): $(OBJS) | |||
| $(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) | |||
| $(FC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) | |||
| run_test: $(TARGET) | |||
| ./$(TARGET) | |||
| @@ -57,4 +57,8 @@ void test_caxpy_inc_0(void); | |||
| void test_zdotu_n_1(void); | |||
| void test_zdotu_offset_1(void); | |||
| void test_drotmg(void); | |||
| void test_dsdot_n_1(void); | |||
| #endif | |||
| @@ -54,7 +54,10 @@ CU_TestInfo test_level1[]={ | |||
| {"Testing zdotu with n == 1",test_zdotu_n_1}, | |||
| {"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1}, | |||
| {"Testing drotmg",test_drotmg}, | |||
| {"Testing dsdot with n == 1",test_dsdot_n_1}, | |||
| CU_TEST_INFO_NULL, | |||
| }; | |||
| @@ -0,0 +1,50 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #include "common_utest.h" | |||
| void test_dsdot_n_1() | |||
| { | |||
| float x= 0.172555164; | |||
| float y= -0.0138700781; | |||
| int incx=1; | |||
| int incy=1; | |||
| int n=1; | |||
| double res1=0.0f, res2=0.0f; | |||
| res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy); | |||
| res2=BLASFUNC_REF(dsdot)(&n, &x, &incx, &y, &incy); | |||
| CU_ASSERT_DOUBLE_EQUAL(res1, res2, CHECK_EPS); | |||
| } | |||
| @@ -0,0 +1,60 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #include "common_utest.h" | |||
| void test_drotmg() | |||
| { | |||
| double te_d1, tr_d1; | |||
| double te_d2, tr_d2; | |||
| double te_x1, tr_x1; | |||
| double te_y1, tr_y1; | |||
| double te_param[5],tr_param[5]; | |||
| int i=0; | |||
| te_d1= tr_d1=0.21149573940783739; | |||
| te_d2= tr_d2=0.046892057172954082; | |||
| te_x1= tr_x1=-0.42272687517106533; | |||
| te_y1= tr_y1=0.42211309121921659; | |||
| //OpenBLAS | |||
| BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); | |||
| //reference | |||
| BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param); | |||
| CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS); | |||
| for(i=0; i<5; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS); | |||
| } | |||
| } | |||