Increase BUFFER_SIZEs and add a safeguard; supply GEMM_R for POWER8/9tags/v0.3.10^2
| @@ -121,7 +121,7 @@ REALNAME: | |||||
| #endif | #endif | ||||
| #define HUGE_PAGESIZE ( 4 << 20) | #define HUGE_PAGESIZE ( 4 << 20) | ||||
| #define BUFFER_SIZE (16 << 20) | |||||
| #define BUFFER_SIZE (32 << 20) | |||||
| #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | ||||
| @@ -141,12 +141,17 @@ REALNAME: | |||||
| #endif | #endif | ||||
| #define HUGE_PAGESIZE ( 4 << 20) | #define HUGE_PAGESIZE ( 4 << 20) | ||||
| #ifndef BUFFERSIZE | |||||
| #if defined(CORTEXA57) | #if defined(CORTEXA57) | ||||
| #define BUFFER_SIZE (20 << 20) | #define BUFFER_SIZE (20 << 20) | ||||
| #elif defined(TSV110) || defined(EMAG8180) | |||||
| #define BUFFER_SIZE (32 << 20) | |||||
| #else | #else | ||||
| #define BUFFER_SIZE (16 << 20) | #define BUFFER_SIZE (16 << 20) | ||||
| #endif | #endif | ||||
| #else | |||||
| #define BUFFER_SIZE (32 << BUFFERSIZE) | |||||
| #endif | |||||
| #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | ||||
| @@ -226,7 +226,13 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||||
| #define HUGE_PAGESIZE ( 2 << 20) | #define HUGE_PAGESIZE ( 2 << 20) | ||||
| #ifndef BUFFERSIZE | #ifndef BUFFERSIZE | ||||
| #if defined(SKYLAKEX) | |||||
| #define BUFFER_SIZE (32 << 21) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #define BUFFER_SIZE (32 << 22) | |||||
| #else | |||||
| #define BUFFER_SIZE (32 << 20) | #define BUFFER_SIZE (32 << 20) | ||||
| #endif | |||||
| #else | #else | ||||
| #define BUFFER_SIZE (32 << BUFFERSIZE) | #define BUFFER_SIZE (32 << BUFFERSIZE) | ||||
| #endif | #endif | ||||
| @@ -123,11 +123,7 @@ REALNAME: | |||||
| #endif | #endif | ||||
| #define HUGE_PAGESIZE ( 4 << 20) | #define HUGE_PAGESIZE ( 4 << 20) | ||||
| #if defined(CORTEXA57) | |||||
| #define BUFFER_SIZE (20 << 20) | |||||
| #else | |||||
| #define BUFFER_SIZE (16 << 20) | |||||
| #endif | |||||
| #define BUFFER_SIZE (32 << 22) | |||||
| #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | ||||
| @@ -87,6 +87,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| /* Memory buffer must fit two matrix subblocks of maximal size */ | |||||
| #define XSTR(x) STR(x) | |||||
| #define STR(x) #x | |||||
| #if BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 * 2) || \ | |||||
| BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_R * 4 * 2) || \ | |||||
| BUFFER_SIZE < (SGEMM_DEFAULT_R * SGEMM_DEFAULT_Q * 4 * 2) | |||||
| #warning BUFFER_SIZE is too small for P, Q, and R of SGEMM - large calculations may crash ! | |||||
| #endif | |||||
| #if BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 * 2) || \ | |||||
| BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_R * 8 * 2) || \ | |||||
| BUFFER_SIZE < (DGEMM_DEFAULT_R * DGEMM_DEFAULT_Q * 8 * 2) | |||||
| #warning BUFFER_SIZE is too small for P, Q, and R of DGEMM - large calculations may crash ! | |||||
| #endif | |||||
| #if BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 * 2) || \ | |||||
| BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_R * 8 * 2) || \ | |||||
| BUFFER_SIZE < (CGEMM_DEFAULT_R * CGEMM_DEFAULT_Q * 8 * 2) | |||||
| #warning BUFFER_SIZE is too small for P, Q, and R of CGEMM - large calculations may crash ! | |||||
| #endif | |||||
| #if BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 * 2) || \ | |||||
| BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_R * 16 * 2) || \ | |||||
| BUFFER_SIZE < (ZGEMM_DEFAULT_R * ZGEMM_DEFAULT_Q * 16 * 2) | |||||
| #warning BUFFER_SIZE is too small for P, Q, and R of ZGEMM - large calculations may crash ! | |||||
| #endif | |||||
| #if defined(COMPILE_TLS) | #if defined(COMPILE_TLS) | ||||
| #include <errno.h> | #include <errno.h> | ||||
| @@ -2740,7 +2764,7 @@ void *blas_memory_alloc(int procpos){ | |||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| printf(" Position -> %d\n", position); | printf(" Position -> %d\n", position); | ||||
| #endif | #endif | ||||
| WMB; | |||||
| memory[position].used = 1; | memory[position].used = 1; | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | ||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| @@ -2229,15 +2229,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 8 | #define ZGEMM_DEFAULT_UNROLL_M 8 | ||||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | #define ZGEMM_DEFAULT_UNROLL_N 2 | ||||
| #define SGEMM_DEFAULT_P 1280 | |||||
| #define DGEMM_DEFAULT_P 640 | |||||
| #define CGEMM_DEFAULT_P 640 | |||||
| #define ZGEMM_DEFAULT_P 320 | |||||
| #define SGEMM_DEFAULT_Q 640 | |||||
| #define DGEMM_DEFAULT_Q 720 | |||||
| #define CGEMM_DEFAULT_Q 640 | |||||
| #define ZGEMM_DEFAULT_Q 640 | |||||
| #define SGEMM_DEFAULT_P 1280UL | |||||
| #define DGEMM_DEFAULT_P 640UL | |||||
| #define CGEMM_DEFAULT_P 640UL | |||||
| #define ZGEMM_DEFAULT_P 320UL | |||||
| #define SGEMM_DEFAULT_Q 640UL | |||||
| #define DGEMM_DEFAULT_Q 720UL | |||||
| #define CGEMM_DEFAULT_Q 640UL | |||||
| #define ZGEMM_DEFAULT_Q 640UL | |||||
| #if 0 | |||||
| #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P | |||||
| #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P | |||||
| #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P | |||||
| #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P | |||||
| #endif | |||||
| #define SGEMM_DEFAULT_R 4096 | |||||
| #define DGEMM_DEFAULT_R 4096 | |||||
| #define CGEMM_DEFAULT_R 4096 | |||||
| #define ZGEMM_DEFAULT_R 512 | |||||
| #define SYMV_P 8 | #define SYMV_P 8 | ||||