| @@ -0,0 +1,27 @@ | |||
| OpenBLAS ChangeLog | |||
| ==================================================================== | |||
| Version 0.1 (in development) | |||
| 26-Feb-2011 | |||
| common: | |||
| * Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34. | |||
| Thank Mr.Ei-ji Nakama providing this patch. (Refs issue #12 on github) | |||
| * Added DEBUG=1 rule in Makefile.rule to build debug version. | |||
| * Disable compiling quad precision in reference BLAS library(netlib BLAS). | |||
| * Added unit testcases in utest/ subdir. Used CUnit framework. | |||
| * Supported OPENBLAS_* & GOTO_* environment variables (Pleas see README) | |||
| * Imported GotoBLAS2 1.13 BSD version | |||
| x86/x86 64: | |||
| * Modified ?axpy functions to return same netlib BLAS results | |||
| when incx==0 or incy==0 (Refs issue #7 on github) | |||
| * Modified ?swap functions to return same netlib BLAS results | |||
| when incx==0 or incy==0 (Refs issue #6 on github) | |||
| * Modified ?rot functions to return same netlib BLAS results | |||
| when incx==0 or incy==0 (Refs issue #4 on github) | |||
| * Detect Intel Westmere to use Nehalem codes. | |||
| * Fixed a typo bug about compiling dynamic ARCH library. | |||
| MIPS64: | |||
| * Improve daxpy performance on ICT Loongson 3A. | |||
| * Supported ICT Loongson 3A CPU (Refs issue #1 on github) | |||
| ==================================================================== | |||
| @@ -70,7 +70,7 @@ VERSION = 0.1 | |||
| # time out to improve performance. This number should be from 4 to 30 | |||
| # which corresponds to (1 << n) cycles. For example, if you set to 26, | |||
| # thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz | |||
| # system). Also you can control this mumber by GOTO_THREAD_TIMEOUT | |||
| # system). Also you can control this mumber by THREAD_TIMEOUT | |||
| # CCOMMON_OPT += -DTHREAD_TIMEOUT=26 | |||
| # Using special device driver for mapping physically contigous memory | |||
| @@ -89,7 +89,13 @@ VERSION = 0.1 | |||
| # UTEST_CHECK = 1 | |||
| # Common Optimization Flag; -O2 is enough. | |||
| # DEBUG = 1 | |||
| ifeq ($(DEBUG), 1) | |||
| COMMON_OPT += -g -DDEBUG | |||
| else | |||
| COMMON_OPT += -O2 | |||
| endif | |||
| # Profiling flags | |||
| COMMON_PROF = -pg | |||
| @@ -4,6 +4,8 @@ OpenBLAS Readme | |||
| OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. OpenBLAS is an open source project supported by Lab of Parallel Software and Computational Science, ISCAS.(http://www.rdcps.ac.cn) | |||
| 2.Intallation | |||
| Download from project homepage. http://xianyi.github.com/OpenBLAS/ | |||
| Or, | |||
| check out codes from git://github.com/xianyi/OpenBLAS.git | |||
| 1)Normal compile | |||
| Please read GotoBLAS_02QuickInstall.txt or type "make" | |||
| @@ -15,23 +17,43 @@ examples: | |||
| On X86 box, compile this library for loongson3a CPU. | |||
| make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A | |||
| 3)Debug version | |||
| make DEBUG=1 | |||
| 3.Support CPU & OS | |||
| Please read GotoBLAS_01Readme.txt | |||
| Additional support CPU: | |||
| x86_64: | |||
| Intel Xeon 56xx (Westmere) //Used GotoBLAS2 Nehalem codes. | |||
| MIPS64: | |||
| ICT Loongson 3A //The initial version used GotoBLAS2 MIPS64 kernels. Thus, the performance is not good. | |||
| 4.Usages | |||
| Link with libopenblas.a or -lopenblas for shared library. | |||
| Set the number of threads. for example, | |||
| 4.1 Set the number of threads with environment variables. for example, | |||
| export OPENBLAS_NUM_THREADS=4 | |||
| or | |||
| export GOTO_NUM_THREADS=4 | |||
| or | |||
| export OMP_NUM_THREADS=4 | |||
| OPENBLAS_NUM_THREAD is prior to OMP_NUM_THREADS. | |||
| The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS. | |||
| 4.2 Set the number of threads with calling functions. for example, | |||
| void goto_set_num_threads(int num_threads); | |||
| or | |||
| void openblas_set_num_threads(int num_threads); | |||
| 5.Report Bugs | |||
| Please add a issue in https://github.com/xianyi/OpenBLAS/issues | |||
| 6.To-Do List: | |||
| Support ICT Loongson 3A CPU | |||
| Optimization on ICT Loongson 3A CPU | |||
| 7.Contact | |||
| OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas | |||
| 8.ChangeLog | |||
| Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. | |||
| @@ -68,8 +68,9 @@ extern long int syscall (long int __sysno, ...); | |||
| static inline int my_mbind(void *addr, unsigned long len, int mode, | |||
| unsigned long *nodemask, unsigned long maxnode, | |||
| unsigned flags) { | |||
| return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags); | |||
| //Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34 | |||
| unsigned long null_nodemask=0; | |||
| return syscall(SYS_mbind, addr, len, mode, &null_nodemask, maxnode, flags); | |||
| } | |||
| static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) { | |||
| @@ -43,4 +43,21 @@ void BLASFUNC_REF(csrot) (blasint *, float *, blasint *, float *, blasint *, | |||
| void BLASFUNC_REF(zdrot) (blasint *, double *, blasint *, double *, blasint *, double *, double *); | |||
| void BLASFUNC_REF(xqrot) (blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *); | |||
| void BLASFUNC_REF(sswap) (blasint *, float *, blasint *, float *, blasint *); | |||
| void BLASFUNC_REF(dswap) (blasint *, double *, blasint *, double *, blasint *); | |||
| void BLASFUNC_REF(qswap) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||
| void BLASFUNC_REF(cswap) (blasint *, float *, blasint *, float *, blasint *); | |||
| void BLASFUNC_REF(zswap) (blasint *, double *, blasint *, double *, blasint *); | |||
| void BLASFUNC_REF(xswap) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||
| void BLASFUNC_REF(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | |||
| void BLASFUNC_REF(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | |||
| void BLASFUNC_REF(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | |||
| void BLASFUNC_REF(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | |||
| float _Complex BLASFUNC_REF(cdotu) (blasint *, float *, blasint *, float *, blasint *); | |||
| float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *, blasint *); | |||
| double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *); | |||
| double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *); | |||
| #endif | |||
| @@ -972,8 +972,15 @@ int get_cpuname(void){ | |||
| return CPUTYPE_ATOM; | |||
| case 13: | |||
| return CPUTYPE_DUNNINGTON; | |||
| break; | |||
| } | |||
| break; | |||
| case 2: | |||
| switch (model) { | |||
| case 12: | |||
| //Xeon Processor 5600 (Westmere-EP) | |||
| return CPUTYPE_NEHALEM; | |||
| } | |||
| break; | |||
| } | |||
| break; | |||
| case 0x7: | |||
| @@ -1289,8 +1296,16 @@ int get_coretype(void){ | |||
| return CORE_ATOM; | |||
| case 13: | |||
| return CORE_DUNNINGTON; | |||
| break; | |||
| } | |||
| break; | |||
| case 2: | |||
| switch (model) { | |||
| case 12: | |||
| //Xeon Processor 5600 (Westmere-EP) | |||
| return CORE_NEHALEM; | |||
| } | |||
| break; | |||
| } | |||
| case 15: | |||
| if (model <= 0x2) return CORE_NORTHWOOD; | |||
| @@ -297,7 +297,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| printf("GEMM: SA .. %p SB .. %p\n", sa, sb); | |||
| #endif | |||
| #ifdef DEBUG | |||
| #ifdef TIMING | |||
| innercost = 0; | |||
| outercost = 0; | |||
| kernelcost = 0; | |||
| @@ -278,7 +278,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| // fprintf(stderr, "A = %p B = %p C = %p\n\tlda = %ld ldb = %ld ldc = %ld\n", a, b, c, lda, ldb, ldc); | |||
| #endif | |||
| #ifdef DEBUG | |||
| #ifdef TIMING | |||
| innercost = 0; | |||
| outercost = 0; | |||
| kernelcost = 0; | |||
| @@ -525,7 +525,16 @@ int blas_thread_init(void){ | |||
| if (thread_timeout < 4) thread_timeout = 4; | |||
| if (thread_timeout > 30) thread_timeout = 30; | |||
| thread_timeout = (1 << thread_timeout); | |||
| } | |||
| }else{ | |||
| p = getenv("GOTO_THREAD_TIMEOUT"); | |||
| if (p) { | |||
| thread_timeout = atoi(p); | |||
| if (thread_timeout < 4) thread_timeout = 4; | |||
| if (thread_timeout > 30) thread_timeout = 30; | |||
| thread_timeout = (1 << thread_timeout); | |||
| } | |||
| } | |||
| for(i = 0; i < blas_num_threads - 1; i++){ | |||
| @@ -790,6 +799,11 @@ void goto_set_num_threads(int num_threads) { | |||
| } | |||
| void openblas_set_num_threads(int num_threads) { | |||
| goto_set_num_threads(num_threads); | |||
| } | |||
| /* Compatible function with pthread_create / join */ | |||
| int gotoblas_pthread(int numthreads, void *function, void *args, int stride) { | |||
| @@ -121,6 +121,11 @@ static gotoblas_t *get_coretype(void){ | |||
| if ((model == 10) || (model == 11) || (model == 14) || (model == 15)) return &gotoblas_NEHALEM; | |||
| if (model == 12) return &gotoblas_ATOM; | |||
| return NULL; | |||
| case 2: | |||
| //Intel Xeon Processor 5600 (Westmere-EP) | |||
| if (model == 12) return &gotoblas_NEHALEM; | |||
| return NULL; | |||
| } | |||
| case 0xf: | |||
| if (model <= 0x2) return &gotoblas_NORTHWOOD; | |||
| @@ -92,7 +92,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SHARE_NAME "/sys/devices/system/cpu/cpu%d/cache/index%d/shared_cpu_map" | |||
| #define NODE_DIR "/sys/devices/system/node" | |||
| #undef DEBUG | |||
| //#undef DEBUG | |||
| /* Private variables */ | |||
| typedef struct { | |||
| @@ -581,6 +581,7 @@ void gotoblas_affinity_init(void) { | |||
| numprocs = 0; | |||
| #else | |||
| numprocs = readenv("OPENBLAS_NUM_THREADS"); | |||
| if (numprocs == 0) numprocs = readenv("GOTO_NUM_THREADS"); | |||
| #endif | |||
| if (numprocs == 0) numprocs = readenv("OMP_NUM_THREADS"); | |||
| @@ -666,7 +667,7 @@ void gotoblas_affinity_init(void) { | |||
| setup_mempolicy(); | |||
| if (readenv("OPENBLAS_MAIN_FREE")) { | |||
| if (readenv("OPENBLAS_MAIN_FREE") || readenv("GOTOBLAS_MAIN_FREE")) { | |||
| sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]); | |||
| } | |||
| @@ -68,9 +68,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| #undef DEBUG | |||
| //#undef DEBUG | |||
| #include "common.h" | |||
| #include <errno.h> | |||
| #ifdef OS_WINDOWS | |||
| #define ALLOC_WINDOWS | |||
| @@ -231,6 +232,13 @@ int blas_get_cpu_number(void){ | |||
| p = getenv("OPENBLAS_NUM_THREADS"); | |||
| if (p) blas_goto_num = atoi(p); | |||
| if (blas_goto_num < 0) blas_goto_num = 0; | |||
| if (blas_goto_num == 0) { | |||
| p = getenv("GOTO_NUM_THREADS"); | |||
| if (p) blas_goto_num = atoi(p); | |||
| if (blas_goto_num < 0) blas_goto_num = 0; | |||
| } | |||
| #endif | |||
| blas_omp_num = 0; | |||
| @@ -379,11 +387,23 @@ static void *alloc_mmap(void *address){ | |||
| MMAP_ACCESS, MMAP_POLICY, -1, 0); | |||
| if (map_address != (void *)-1) { | |||
| #ifdef OS_LINUX | |||
| my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||
| #ifdef DEBUG | |||
| int ret; | |||
| ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||
| if(ret==-1){ | |||
| int errsv=errno; | |||
| perror("alloc_mmap:"); | |||
| printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); | |||
| } | |||
| #else | |||
| my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||
| #endif | |||
| #endif | |||
| allocsize = DGEMM_P * DGEMM_Q * sizeof(double); | |||
| start = (BLASULONG)map_address; | |||
| @@ -979,7 +999,7 @@ void *blas_memory_alloc(int procpos){ | |||
| memory[position].addr = map_address; | |||
| #ifdef DEBUG | |||
| printf(" Mapping Succeeded. %p(%d)\n", (void *)alloc_area[position], position); | |||
| printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position); | |||
| #endif | |||
| } | |||
| @@ -1010,7 +1030,7 @@ void *blas_memory_alloc(int procpos){ | |||
| #ifdef DEBUG | |||
| printf("Mapped : %p %3d\n\n", | |||
| (void *)alloc_area[position], position); | |||
| (void *)memory[position].addr, position); | |||
| #endif | |||
| return (void *)memory[position].addr; | |||
| @@ -1053,7 +1073,7 @@ void blas_memory_free(void *free_area){ | |||
| #ifdef DEBUG | |||
| for (position = 0; position < NUM_BUFFERS; position++) | |||
| printf("%4ld %p : %d\n", position, alloc_area[position], alloc_used[position]); | |||
| printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used); | |||
| #endif | |||
| return; | |||
| @@ -81,6 +81,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc | |||
| #ifdef SMP | |||
| nthreads = num_cpu_avail(1); | |||
| //disable multi-thread when incx==0 or incy==0 | |||
| //In that case, the threads would be dependent. | |||
| if (incx == 0 || incy == 0) | |||
| nthreads = 1; | |||
| if (nthreads == 1) { | |||
| #endif | |||
| @@ -78,7 +78,12 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ | |||
| #ifdef SMP | |||
| nthreads = num_cpu_avail(1); | |||
| //disable multi-thread when incx==0 or incy==0 | |||
| //In that case, the threads would be dependent. | |||
| if (incx == 0 || incy == 0) | |||
| nthreads = 1; | |||
| if (nthreads == 1) { | |||
| #endif | |||
| @@ -83,6 +83,11 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in | |||
| #ifdef SMP | |||
| nthreads = num_cpu_avail(1); | |||
| //disable multi-thread when incx==0 or incy==0 | |||
| //In that case, the threads would be dependent. | |||
| if (incx == 0 || incy == 0) | |||
| nthreads = 1; | |||
| if (nthreads == 1) { | |||
| #endif | |||
| @@ -80,6 +80,11 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ | |||
| #ifdef SMP | |||
| nthreads = num_cpu_avail(1); | |||
| //disable multi-thread when incx==0 or incy==0 | |||
| //In that case, the threads would be dependent. | |||
| if (incx == 0 || incy == 0) | |||
| nthreads = 1; | |||
| if (nthreads == 1) { | |||
| #endif | |||
| @@ -1440,6 +1440,12 @@ | |||
| .L50: | |||
| movl M, %eax | |||
| movl Y, YY | |||
| //If incx==0 || incy==0, avoid unloop. | |||
| cmpl $0, INCX | |||
| je .L56 | |||
| cmpl $0, INCY | |||
| je .L56 | |||
| sarl $3, %eax | |||
| jle .L55 | |||
| ALIGN_3 | |||
| @@ -698,6 +698,12 @@ | |||
| .L40: | |||
| movl Y, YY | |||
| movl M, %eax | |||
| //If incx==0 || incy==0, avoid unloop. | |||
| cmpl $0, INCX | |||
| je .L46 | |||
| cmpl $0, INCY | |||
| je .L46 | |||
| sarl $3, %eax | |||
| jle .L45 | |||
| ALIGN_3 | |||
| @@ -859,6 +859,10 @@ | |||
| .L50: | |||
| movl N, I | |||
| cmpl $0, INCX | |||
| je .L56 | |||
| cmpl $0, INCY | |||
| je .L56 | |||
| sarl $2, I | |||
| jle .L55 | |||
| ALIGN_3 | |||
| @@ -2857,6 +2857,11 @@ | |||
| unpcklps ALPHA_I, ALPHA_R | |||
| unpcklps %xmm5, ALPHA_I | |||
| #endif | |||
| //If incx==0 || incy==0, avoid unloop and jump to end. | |||
| cmpl $0, INCX | |||
| je .L200 | |||
| cmpl $0, INCY | |||
| je .L200 | |||
| movl Y, YY | |||
| @@ -3090,8 +3095,41 @@ | |||
| addps %xmm1, %xmm4 | |||
| movsd %xmm4, (Y) | |||
| jmp .L999 | |||
| ALIGN_3 | |||
| .L200: | |||
| movl M, %eax | |||
| cmpl $0, %eax | |||
| jle .L999 | |||
| ALIGN_3 | |||
| .L201: | |||
| movsd (X), %xmm0 | |||
| #ifdef HAVE_SSE3 | |||
| movshdup %xmm0, %xmm1 | |||
| movsldup %xmm0, %xmm0 | |||
| #else | |||
| movaps %xmm0, %xmm1 | |||
| shufps $0xa0, %xmm0, %xmm0 | |||
| shufps $0xf5, %xmm1, %xmm1 | |||
| #endif | |||
| mulps ALPHA_R, %xmm0 | |||
| mulps ALPHA_I, %xmm1 | |||
| movsd (Y), %xmm4 | |||
| addps %xmm0, %xmm4 | |||
| addps %xmm1, %xmm4 | |||
| movsd %xmm4, (Y) | |||
| decl %eax | |||
| jg .L201 | |||
| ALIGN_3 | |||
| .L999: | |||
| popl %ebp | |||
| popl %ebx | |||
| @@ -1318,6 +1318,12 @@ | |||
| movl Y, YY | |||
| movl M, %eax | |||
| //If incx==0 || incy==0, avoid unloop and jump to end. | |||
| cmpl $0, INCX | |||
| je .L58 | |||
| cmpl $0, INCY | |||
| je .L58 | |||
| sarl $2, %eax | |||
| jle .L55 | |||
| @@ -1498,6 +1504,7 @@ | |||
| andl $1, %eax | |||
| jle .L999 | |||
| .L58: | |||
| MOVDDUP( 0 * SIZE, X, %xmm0) | |||
| MOVDDUP( 1 * SIZE, X, %xmm1) | |||
| @@ -1510,6 +1517,10 @@ | |||
| movlpd %xmm4, 0 * SIZE(YY) | |||
| movhpd %xmm4, 1 * SIZE(YY) | |||
| decl %eax | |||
| jg .L58 | |||
| ALIGN_3 | |||
| .L999: | |||
| @@ -1285,6 +1285,12 @@ | |||
| .L50: | |||
| movl N, I | |||
| //if incx ==0 || incy==0 jump to the tail | |||
| cmpl $0, INCX | |||
| je .L56 | |||
| cmpl $0, INCY | |||
| je .L56 | |||
| sarl $2, I | |||
| jle .L55 | |||
| ALIGN_3 | |||
| @@ -1463,6 +1463,12 @@ | |||
| .L50: | |||
| movq M, %rax | |||
| movq Y, YY | |||
| //If incx==0 || incy==0, avoid unloop. | |||
| cmpq $0, INCX | |||
| je .L56 | |||
| cmpq $0, INCY | |||
| je .L56 | |||
| sarq $3, %rax | |||
| jle .L55 | |||
| ALIGN_3 | |||
| @@ -805,6 +805,12 @@ | |||
| .L40: | |||
| movq Y, YY | |||
| movq M, %rax | |||
| //If incx==0 || incy==0, avoid unloop. | |||
| cmpq $0, INCX | |||
| je .L46 | |||
| cmpq $0, INCY | |||
| je .L46 | |||
| sarq $3, %rax | |||
| jle .L45 | |||
| ALIGN_3 | |||
| @@ -887,6 +887,10 @@ | |||
| .L50: | |||
| movq N, %rax | |||
| cmpq $0, INCX | |||
| je .L56 | |||
| cmpq $0, INCY | |||
| je .L56 | |||
| sarq $2, %rax | |||
| jle .L55 | |||
| ALIGN_3 | |||
| @@ -2893,6 +2893,12 @@ | |||
| unpcklps %xmm13, %xmm15 | |||
| #endif | |||
| //If incx==0 || incy==0, avoid unloop and jump to end. | |||
| cmpq $0, INCX | |||
| je .L200 | |||
| cmpq $0, INCY | |||
| je .L200 | |||
| movq Y, YY | |||
| movq M, %rax | |||
| @@ -3105,8 +3111,42 @@ | |||
| addps %xmm1, %xmm8 | |||
| movsd %xmm8, (Y) | |||
| jmp .L999 | |||
| ALIGN_3 | |||
| .L200: | |||
| movq M, %rax | |||
| cmpq $0, %rax | |||
| jle .L999 | |||
| ALIGN_3 | |||
| .L201: | |||
| movsd (X), %xmm0 | |||
| addq INCX, X | |||
| #ifdef HAVE_SSE3 | |||
| movshdup %xmm0, %xmm1 | |||
| movsldup %xmm0, %xmm0 | |||
| #else | |||
| pshufd $0xf5, %xmm0, %xmm1 | |||
| shufps $0xa0, %xmm0, %xmm0 | |||
| #endif | |||
| mulps %xmm14, %xmm0 | |||
| mulps %xmm15, %xmm1 | |||
| movsd (Y), %xmm8 | |||
| addps %xmm0, %xmm8 | |||
| addps %xmm1, %xmm8 | |||
| movsd %xmm8, (Y) | |||
| addq INCY, Y | |||
| decq %rax | |||
| jg .L201 | |||
| ALIGN_3 | |||
| .L999: | |||
| xorq %rax, %rax | |||
| @@ -1416,6 +1416,12 @@ | |||
| movq Y, YY | |||
| movq M, %rax | |||
| //If incx==0 || incy==0, avoid unloop and jump to end. | |||
| cmpq $0, INCX | |||
| je .L58 | |||
| cmpq $0, INCY | |||
| je .L58 | |||
| sarq $3, %rax | |||
| jle .L55 | |||
| @@ -1769,6 +1775,7 @@ | |||
| andq $1, %rax | |||
| jle .L999 | |||
| .L58: | |||
| MOVDDUP( 0 * SIZE, X, %xmm0) | |||
| MOVDDUP( 1 * SIZE, X, %xmm1) | |||
| @@ -1781,6 +1788,9 @@ | |||
| movlpd %xmm8, 0 * SIZE(YY) | |||
| movhpd %xmm8, 1 * SIZE(YY) | |||
| decq %rax | |||
| jg .L58 | |||
| ALIGN_3 | |||
| .L999: | |||
| @@ -1523,6 +1523,10 @@ | |||
| .L50: | |||
| movq N, %rax | |||
| cmpq $0, INCX | |||
| je .L56 | |||
| cmpq $0, INCY | |||
| je .L56 | |||
| sarq $2, %rax | |||
| jle .L55 | |||
| ALIGN_3 | |||
| @@ -138,7 +138,8 @@ DBLASOBJS += \ | |||
| dpotf2f.$(SUFFIX) dpotrff.$(SUFFIX) dtrti2f.$(SUFFIX) dtrtrif.$(SUFFIX) \ | |||
| dlaswpf.$(SUFFIX) dgetrsf.$(SUFFIX) dgesvf.$(SUFFIX) dpotrif.$(SUFFIX) \ | |||
| QBLASOBJS += \ | |||
| QBLASOBJS += | |||
| # \ | |||
| qgetf2f.$(SUFFIX) qgetrff.$(SUFFIX) qlauu2f.$(SUFFIX) qlauumf.$(SUFFIX) \ | |||
| qpotf2f.$(SUFFIX) qpotrff.$(SUFFIX) qtrti2f.$(SUFFIX) qtrtrif.$(SUFFIX) \ | |||
| qlaswpf.$(SUFFIX) qgetrsf.$(SUFFIX) qgesvf.$(SUFFIX) qpotrif.$(SUFFIX) \ | |||
| @@ -153,7 +154,8 @@ ZBLASOBJS += \ | |||
| zpotf2f.$(SUFFIX) zpotrff.$(SUFFIX) ztrti2f.$(SUFFIX) ztrtrif.$(SUFFIX) \ | |||
| zlaswpf.$(SUFFIX) zgetrsf.$(SUFFIX) zgesvf.$(SUFFIX) zpotrif.$(SUFFIX) \ | |||
| XBLASOBJS += \ | |||
| XBLASOBJS += | |||
| # \ | |||
| xgetf2f.$(SUFFIX) xgetrff.$(SUFFIX) xlauu2f.$(SUFFIX) xlauumf.$(SUFFIX) \ | |||
| xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \ | |||
| xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \ | |||
| @@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system | |||
| TARGET=openblas_utest | |||
| CUNIT_LIB=/usr/local/lib/libcunit.a | |||
| OBJS=main.o test_rot.o | |||
| OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o | |||
| all : run_test | |||
| $(TARGET): $(OBJS) | |||
| $(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) | |||
| $(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) | |||
| run_test: $(TARGET) | |||
| ./$(TARGET) | |||
| @@ -36,9 +36,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include <common.h> | |||
| #define CHECK_EPS 0.0002 | |||
| #define CHECK_EPS 0.00002 | |||
| //Testcase list | |||
| void test_drot_incx_0(void); | |||
| void test_drot_inc_0(void); | |||
| void test_srot_inc_0(void); | |||
| void test_zdrot_inc_0(void); | |||
| void test_csrot_inc_0(void); | |||
| void test_dswap_inc_0(void); | |||
| void test_zswap_inc_0(void); | |||
| void test_sswap_inc_0(void); | |||
| void test_cswap_inc_0(void); | |||
| void test_daxpy_inc_0(void); | |||
| void test_zaxpy_inc_0(void); | |||
| void test_saxpy_inc_0(void); | |||
| void test_caxpy_inc_0(void); | |||
| void test_zdotu_n_1(void); | |||
| #endif | |||
| @@ -33,12 +33,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include <stdio.h> | |||
| #include <string.h> | |||
| #include "common_utest.h" | |||
| #include <CUnit/Basic.h> | |||
| CU_TestInfo test_level1[]={ | |||
| {"Testing drot when incx & incy == 0",test_drot_incx_0}, | |||
| {"Testing srot when incx || incy == 0",test_srot_inc_0}, | |||
| {"Testing drot when incx || incy == 0",test_drot_inc_0}, | |||
| {"Testing csrot when incx || incy == 0",test_csrot_inc_0}, | |||
| {"Testing zdrot when incx || incy == 0",test_zdrot_inc_0}, | |||
| {"Testing sswap with incx || incy == 0",test_sswap_inc_0}, | |||
| {"Testing dswap with incx || incy == 0",test_dswap_inc_0}, | |||
| {"Testing cswap with incx || incy == 0",test_cswap_inc_0}, | |||
| {"Testing zswap with incx || incy == 0",test_zswap_inc_0}, | |||
| {"Testing saxpy with incx || incy == 0",test_saxpy_inc_0}, | |||
| {"Testing daxpy with incx || incy == 0",test_daxpy_inc_0}, | |||
| {"Testing caxpy with incx || incy == 0",test_caxpy_inc_0}, | |||
| {"Testing zaxpy with incx || incy == 0",test_zaxpy_inc_0}, | |||
| {"Testing zdotu with n == 1",test_zdotu_n_1}, | |||
| CU_TEST_INFO_NULL, | |||
| }; | |||
| @@ -64,7 +78,9 @@ int main() | |||
| printf("Seting OK\n"); | |||
| fflush(stdout); | |||
| /* Run all tests using the CUnit Basic interface */ | |||
| CU_basic_set_mode(CU_BRM_VERBOSE); | |||
| @@ -0,0 +1,117 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #include "common_utest.h" | |||
| void test_daxpy_inc_0(void) | |||
| { | |||
| int i; | |||
| int N=8,incX=0,incY=0; | |||
| double a=0.25; | |||
| double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(daxpy)(&N,&a,x1,&incX,y1,&incY); | |||
| //reference | |||
| BLASFUNC_REF(daxpy)(&N,&a,x2,&incX,y2,&incY); | |||
| for(i=0; i<N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| void test_zaxpy_inc_0(void) | |||
| { | |||
| int i; | |||
| int N=4,incX=0,incY=0; | |||
| double a[2]={0.25,0.5}; | |||
| double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(zaxpy)(&N,a,x1,&incX,y1,&incY); | |||
| //reference | |||
| BLASFUNC_REF(zaxpy)(&N,a,x2,&incX,y2,&incY); | |||
| for(i=0; i<2*N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| void test_saxpy_inc_0(void) | |||
| { | |||
| int i; | |||
| int N=8,incX=0,incY=0; | |||
| float a=0.25; | |||
| float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(saxpy)(&N,&a,x1,&incX,y1,&incY); | |||
| //reference | |||
| BLASFUNC_REF(saxpy)(&N,&a,x2,&incX,y2,&incY); | |||
| for(i=0; i<N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| void test_caxpy_inc_0(void) | |||
| { | |||
| int i; | |||
| int N=4,incX=0,incY=0; | |||
| float a[2]={0.25,0.5}; | |||
| float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(caxpy)(&N,a,x1,&incX,y1,&incY); | |||
| //reference | |||
| BLASFUNC_REF(caxpy)(&N,a,x2,&incX,y2,&incY); | |||
| for(i=0; i<2*N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| @@ -0,0 +1,56 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #include "common_utest.h" | |||
| #include <complex.h> | |||
| void test_zdotu_n_1(void) | |||
| { | |||
| int N=1,incX=1,incY=1; | |||
| double x1[]={1.0,1.0}; | |||
| double y1[]={1.0,2.0}; | |||
| double x2[]={1.0,1.0}; | |||
| double y2[]={1.0,2.0}; | |||
| double _Complex result1=0.0; | |||
| double _Complex result2=0.0; | |||
| //OpenBLAS | |||
| result1=BLASFUNC(zdotu)(&N,x1,&incX,y1,&incY); | |||
| //reference | |||
| result2=BLASFUNC_REF(zdotu)(&N,x2,&incX,y2,&incY); | |||
| CU_ASSERT_DOUBLE_EQUAL(creal(result1), creal(result2), CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(cimag(result1), cimag(result2), CHECK_EPS); | |||
| // printf("\%lf,%lf\n",creal(result1),cimag(result1)); | |||
| } | |||
| @@ -32,9 +32,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include "common_utest.h" | |||
| void test_drot_incx_0(void) | |||
| void test_drot_inc_0(void) | |||
| { | |||
| int i; | |||
| int i=0; | |||
| int N=4,incX=0,incY=0; | |||
| double c=0.25,s=0.5; | |||
| double x1[]={1.0,3.0,5.0,7.0}; | |||
| @@ -43,12 +43,75 @@ void test_drot_incx_0(void) | |||
| double y2[]={2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| drot_(&N,x1,&incX,y1,&incY,&c,&s); | |||
| BLASFUNC(drot)(&N,x1,&incX,y1,&incY,&c,&s); | |||
| //reference | |||
| drotf_(&N,x2,&incX,y2,&incY,&c,&s); | |||
| BLASFUNC_REF(drot)(&N,x2,&incX,y2,&incY,&c,&s); | |||
| for(i=0; i<N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| void test_zdrot_inc_0(void) | |||
| { | |||
| int i=0; | |||
| int N=4,incX=0,incY=0; | |||
| double c=0.25,s=0.5; | |||
| double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(zdrot)(&N,x1,&incX,y1,&incY,&c,&s); | |||
| //reference | |||
| BLASFUNC_REF(zdrot)(&N,x2,&incX,y2,&incY,&c,&s); | |||
| for(i=0; i<2*N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| void test_srot_inc_0(void) | |||
| { | |||
| int i=0; | |||
| int N=4,incX=0,incY=0; | |||
| float c=0.25,s=0.5; | |||
| float x1[]={1.0,3.0,5.0,7.0}; | |||
| float y1[]={2.0,4.0,6.0,8.0}; | |||
| float x2[]={1.0,3.0,5.0,7.0}; | |||
| float y2[]={2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(srot)(&N,x1,&incX,y1,&incY,&c,&s); | |||
| //reference | |||
| BLASFUNC_REF(srot)(&N,x2,&incX,y2,&incY,&c,&s); | |||
| for(i=0; i<N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| void test_csrot_inc_0(void) | |||
| { | |||
| int i=0; | |||
| int N=4,incX=0,incY=0; | |||
| float c=0.25,s=0.5; | |||
| float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(csrot)(&N,x1,&incX,y1,&incY,&c,&s); | |||
| //reference | |||
| BLASFUNC_REF(csrot)(&N,x2,&incX,y2,&incY,&c,&s); | |||
| for(i=0; i<2*N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| @@ -0,0 +1,113 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the ISCAS nor the names of its contributors may | |||
| be used to endorse or promote products derived from this software | |||
| without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #include "common_utest.h" | |||
| void test_dswap_inc_0(void) | |||
| { | |||
| int i=0; | |||
| int N=4,incX=0,incY=0; | |||
| double x1[]={1.0,3.0,5.0,7.0}; | |||
| double y1[]={2.0,4.0,6.0,8.0}; | |||
| double x2[]={1.0,3.0,5.0,7.0}; | |||
| double y2[]={2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(dswap)(&N,x1,&incX,y1,&incY); | |||
| //reference | |||
| BLASFUNC_REF(dswap)(&N,x2,&incX,y2,&incY); | |||
| for(i=0; i<N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| void test_zswap_inc_0(void) | |||
| { | |||
| int i=0; | |||
| int N=4,incX=0,incY=0; | |||
| double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| double y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(zswap)(&N,x1,&incX,y1,&incY); | |||
| //reference | |||
| BLASFUNC_REF(zswap)(&N,x2,&incX,y2,&incY); | |||
| for(i=0; i<2*N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| void test_sswap_inc_0(void) | |||
| { | |||
| int i=0; | |||
| int N=4,incX=0,incY=0; | |||
| float x1[]={1.0,3.0,5.0,7.0}; | |||
| float y1[]={2.0,4.0,6.0,8.0}; | |||
| float x2[]={1.0,3.0,5.0,7.0}; | |||
| float y2[]={2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(sswap)(&N,x1,&incX,y1,&incY); | |||
| //reference | |||
| BLASFUNC_REF(sswap)(&N,x2,&incX,y2,&incY); | |||
| for(i=0; i<N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||
| void test_cswap_inc_0(void) | |||
| { | |||
| int i=0; | |||
| int N=4,incX=0,incY=0; | |||
| float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| float x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0}; | |||
| float y2[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0}; | |||
| //OpenBLAS | |||
| BLASFUNC(cswap)(&N,x1,&incX,y1,&incY); | |||
| //reference | |||
| BLASFUNC_REF(cswap)(&N,x2,&incX,y2,&incY); | |||
| for(i=0; i<2*N; i++){ | |||
| CU_ASSERT_DOUBLE_EQUAL(x1[i], x2[i], CHECK_EPS); | |||
| CU_ASSERT_DOUBLE_EQUAL(y1[i], y2[i], CHECK_EPS); | |||
| } | |||
| } | |||