| @@ -121,6 +121,11 @@ In chronological order: | |||||
| * [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). | * [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). | ||||
| ARMv8 support. | ARMv8 support. | ||||
| * Jerome Robert <jeromerobert@gmx.com> | |||||
| * [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478) | |||||
| * [2015-12-23] `stack_check` in `gemv.c` (bug #722) | |||||
| * [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731) | |||||
| * Dan Kortschak | * Dan Kortschak | ||||
| * [2015-01-07] Added test for drotmg bug #484. | * [2015-01-07] Added test for drotmg bug #484. | ||||
| @@ -174,8 +174,11 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| STACK_ALLOC(m, FLOAT, buffer); | STACK_ALLOC(m, FLOAT, buffer); | ||||
| #ifdef SMPTEST | #ifdef SMPTEST | ||||
| nthreads = num_cpu_avail(2); | |||||
| // Threshold chosen so that speed-up is > 1 on a Xeon E5-2630 | |||||
| if(1L * m * n > 24L * GEMM_MULTITHREAD_THRESHOLD) | |||||
| nthreads = num_cpu_avail(2); | |||||
| else | |||||
| nthreads = 1; | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||
| @@ -77,12 +77,13 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ | |||||
| if (incy < 0) y -= (n - 1) * incy; | if (incy < 0) y -= (n - 1) * incy; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| nthreads = num_cpu_avail(1); | |||||
| //disable multi-thread when incx==0 or incy==0 | //disable multi-thread when incx==0 or incy==0 | ||||
| //In that case, the threads would be dependent. | //In that case, the threads would be dependent. | ||||
| if (incx == 0 || incy == 0) | |||||
| nthreads = 1; | |||||
| if (incx == 0 || incy == 0 || n < 2097152 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT)) | |||||
| nthreads = 1; | |||||
| else | |||||
| nthreads = num_cpu_avail(1); | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||
| @@ -213,7 +213,11 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| buffer = (FLOAT *)blas_memory_alloc(1); | buffer = (FLOAT *)blas_memory_alloc(1); | ||||
| #ifdef SMPTEST | #ifdef SMPTEST | ||||
| nthreads = num_cpu_avail(2); | |||||
| // Threshold chosen so that speed-up is > 1 on a Xeon E5-2630 | |||||
| if(1L * m * n > 3L * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD) | |||||
| nthreads = num_cpu_avail(2); | |||||
| else | |||||
| nthreads = 1; | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||