| @@ -80,7 +80,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #undef COMPILE_TLS | #undef COMPILE_TLS | ||||
| #endif | #endif | ||||
| #if defined(__GLIBC_PREREQ) | |||||
| #if defined(__GLIBC_PREREQ) | |||||
| #if !__GLIBC_PREREQ(2,20) | #if !__GLIBC_PREREQ(2,20) | ||||
| #undef COMPILE_TLS | #undef COMPILE_TLS | ||||
| #endif | #endif | ||||
| @@ -161,7 +161,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__)) | #if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__)) | ||||
| #include <conio.h> | #include <conio.h> | ||||
| #undef printf | #undef printf | ||||
| #define printf _cprintf | |||||
| #define printf _cprintf | |||||
| #endif | #endif | ||||
| #ifdef OS_LINUX | #ifdef OS_LINUX | ||||
| @@ -190,14 +190,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CONSTRUCTOR __cdecl | #define CONSTRUCTOR __cdecl | ||||
| #define DESTRUCTOR __cdecl | #define DESTRUCTOR __cdecl | ||||
| #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) | #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) | ||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||||
| #elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) | #elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) | ||||
| #define CONSTRUCTOR __attribute__ ((constructor(101))) | |||||
| #define DESTRUCTOR __attribute__ ((destructor(101))) | |||||
| #define CONSTRUCTOR __attribute__ ((constructor(101))) | |||||
| #define DESTRUCTOR __attribute__ ((destructor(101))) | |||||
| #else | #else | ||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||||
| #endif | #endif | ||||
| #ifdef DYNAMIC_ARCH | #ifdef DYNAMIC_ARCH | ||||
| @@ -272,7 +272,7 @@ int get_num_procs(void) { | |||||
| return nums; | return nums; | ||||
| } | } | ||||
| ret = CPU_COUNT_S(size,cpusetp); | ret = CPU_COUNT_S(size,cpusetp); | ||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| CPU_FREE(cpusetp); | CPU_FREE(cpusetp); | ||||
| return nums; | return nums; | ||||
| } else { | } else { | ||||
| @@ -281,7 +281,7 @@ int get_num_procs(void) { | |||||
| return nums; | return nums; | ||||
| } | } | ||||
| ret = CPU_COUNT(&cpuset); | ret = CPU_COUNT(&cpuset); | ||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| return nums; | return nums; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -628,12 +628,12 @@ static void *alloc_mmap(void *address){ | |||||
| if (address){ | if (address){ | ||||
| map_address = mmap(address, | map_address = mmap(address, | ||||
| allocation_block_size, | |||||
| MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); | |||||
| allocation_block_size, | |||||
| MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); | |||||
| } else { | } else { | ||||
| map_address = mmap(address, | map_address = mmap(address, | ||||
| allocation_block_size, | |||||
| MMAP_ACCESS, MMAP_POLICY, -1, 0); | |||||
| allocation_block_size, | |||||
| MMAP_ACCESS, MMAP_POLICY, -1, 0); | |||||
| } | } | ||||
| STORE_RELEASE_FUNC(map_address, alloc_mmap_free); | STORE_RELEASE_FUNC(map_address, alloc_mmap_free); | ||||
| @@ -648,7 +648,7 @@ static void *alloc_mmap(void *address){ | |||||
| #else | #else | ||||
| #define BENCH_ITERATION 4 | #define BENCH_ITERATION 4 | ||||
| #define SCALING 2 | |||||
| #define SCALING 2 | |||||
| static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) { | static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) { | ||||
| @@ -711,60 +711,60 @@ static void *alloc_mmap(void *address){ | |||||
| #endif | #endif | ||||
| map_address = mmap(NULL, allocation_block_size * SCALING, | map_address = mmap(NULL, allocation_block_size * SCALING, | ||||
| MMAP_ACCESS, MMAP_POLICY, -1, 0); | |||||
| MMAP_ACCESS, MMAP_POLICY, -1, 0); | |||||
| if (map_address != (void *)-1) { | if (map_address != (void *)-1) { | ||||
| #ifdef OS_LINUX | #ifdef OS_LINUX | ||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| int ret=0; | |||||
| ret=my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||||
| if(ret==-1){ | |||||
| int errsv=errno; | |||||
| perror("OpenBLAS alloc_mmap:"); | |||||
| printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); | |||||
| } | |||||
| int ret=0; | |||||
| ret=my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||||
| if(ret==-1){ | |||||
| int errsv=errno; | |||||
| perror("OpenBLAS alloc_mmap:"); | |||||
| printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); | |||||
| } | |||||
| #else | #else | ||||
| my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||||
| my_mbind(map_address, allocation_block_size * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| allocsize = DGEMM_P * DGEMM_Q * sizeof(double); | |||||
| allocsize = DGEMM_P * DGEMM_Q * sizeof(double); | |||||
| start = (BLASULONG)map_address; | |||||
| current = (SCALING - 1) * allocation_block_size; | |||||
| original = current; | |||||
| start = (BLASULONG)map_address; | |||||
| current = (SCALING - 1) * allocation_block_size; | |||||
| original = current; | |||||
| while(current > 0 && current <= original) { | |||||
| *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; | |||||
| start += PAGESIZE; | |||||
| current -= PAGESIZE; | |||||
| } | |||||
| while(current > 0 && current <= original) { | |||||
| *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; | |||||
| start += PAGESIZE; | |||||
| current -= PAGESIZE; | |||||
| } | |||||
| *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; | |||||
| *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; | |||||
| start = (BLASULONG)map_address; | |||||
| start = (BLASULONG)map_address; | |||||
| best = (BLASULONG)-1; | |||||
| best_address = map_address; | |||||
| best = (BLASULONG)-1; | |||||
| best_address = map_address; | |||||
| while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * allocation_block_size)) { | |||||
| while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * allocation_block_size)) { | |||||
| current = run_bench(start, allocsize); | |||||
| current = run_bench(start, allocsize); | |||||
| if (best > current) { | |||||
| best = current; | |||||
| best_address = (void *)start; | |||||
| } | |||||
| if (best > current) { | |||||
| best = current; | |||||
| best_address = (void *)start; | |||||
| } | |||||
| start += PAGESIZE; | |||||
| start += PAGESIZE; | |||||
| } | |||||
| } | |||||
| if ((BLASULONG)best_address > (BLASULONG)map_address) | if ((BLASULONG)best_address > (BLASULONG)map_address) | ||||
| munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); | |||||
| munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); | |||||
| munmap((void *)((BLASULONG)best_address + allocation_block_size), (SCALING - 1) * allocation_block_size + (BLASULONG)map_address - (BLASULONG)best_address); | munmap((void *)((BLASULONG)best_address + allocation_block_size), (SCALING - 1) * allocation_block_size + (BLASULONG)map_address - (BLASULONG)best_address); | ||||
| @@ -854,9 +854,9 @@ static void *alloc_windows(void *address){ | |||||
| void *map_address; | void *map_address; | ||||
| map_address = VirtualAlloc(address, | map_address = VirtualAlloc(address, | ||||
| allocation_block_size, | |||||
| MEM_RESERVE | MEM_COMMIT, | |||||
| PAGE_READWRITE); | |||||
| allocation_block_size, | |||||
| MEM_RESERVE | MEM_COMMIT, | |||||
| PAGE_READWRITE); | |||||
| if (map_address == (void *)NULL) map_address = (void *)-1; | if (map_address == (void *)NULL) map_address = (void *)-1; | ||||
| @@ -897,9 +897,9 @@ static void *alloc_devicedirver(void *address){ | |||||
| } | } | ||||
| map_address = mmap(address, allocation_block_size, | map_address = mmap(address, allocation_block_size, | ||||
| PROT_READ | PROT_WRITE, | |||||
| MAP_FILE | MAP_SHARED, | |||||
| fd, 0); | |||||
| PROT_READ | PROT_WRITE, | |||||
| MAP_FILE | MAP_SHARED, | |||||
| fd, 0); | |||||
| STORE_RELEASE_FUNC_WITH_ATTR(map_address, alloc_devicedirver_free, fd); | STORE_RELEASE_FUNC_WITH_ATTR(map_address, alloc_devicedirver_free, fd); | ||||
| @@ -974,12 +974,12 @@ static void *alloc_hugetlb(void *address){ | |||||
| shmid = shmget(IPC_PRIVATE, allocation_block_size, | shmid = shmget(IPC_PRIVATE, allocation_block_size, | ||||
| #ifdef OS_LINUX | #ifdef OS_LINUX | ||||
| SHM_HUGETLB | | |||||
| SHM_HUGETLB | | |||||
| #endif | #endif | ||||
| #ifdef OS_AIX | #ifdef OS_AIX | ||||
| SHM_LGPAGE | SHM_PIN | | |||||
| SHM_LGPAGE | SHM_PIN | | |||||
| #endif | #endif | ||||
| IPC_CREAT | SHM_R | SHM_W); | |||||
| IPC_CREAT | SHM_R | SHM_W); | |||||
| if (shmid != -1) { | if (shmid != -1) { | ||||
| map_address = (void *)shmat(shmid, address, SHM_RND); | map_address = (void *)shmat(shmid, address, SHM_RND); | ||||
| @@ -1026,9 +1026,9 @@ static void *alloc_hugetlb(void *address){ | |||||
| } | } | ||||
| map_address = (void *)VirtualAlloc(address, | map_address = (void *)VirtualAlloc(address, | ||||
| allocation_block_size, | |||||
| MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, | |||||
| PAGE_READWRITE); | |||||
| allocation_block_size, | |||||
| MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, | |||||
| PAGE_READWRITE); | |||||
| tp.Privileges[0].Attributes = 0; | tp.Privileges[0].Attributes = 0; | ||||
| AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL); | AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL); | ||||
| @@ -1078,9 +1078,9 @@ static void *alloc_hugetlbfile(void *address){ | |||||
| unlink(filename); | unlink(filename); | ||||
| map_address = mmap(address, allocation_block_size, | map_address = mmap(address, allocation_block_size, | ||||
| PROT_READ | PROT_WRITE, | |||||
| MAP_SHARED, | |||||
| fd, 0); | |||||
| PROT_READ | PROT_WRITE, | |||||
| MAP_SHARED, | |||||
| fd, 0); | |||||
| STORE_RELEASE_FUNC_WITH_ATTR(map_address, alloc_hugetlbfile_free, fd); | STORE_RELEASE_FUNC_WITH_ATTR(map_address, alloc_hugetlbfile_free, fd); | ||||
| @@ -1107,7 +1107,7 @@ static volatile int memory_initialized = 0; | |||||
| /* 1 : Level 2 functions */ | /* 1 : Level 2 functions */ | ||||
| /* 2 : Thread */ | /* 2 : Thread */ | ||||
| static void blas_memory_cleanup(void* ptr){ | |||||
| static void blas_memory_cleanup(void* ptr){ | |||||
| if (ptr) { | if (ptr) { | ||||
| struct alloc_t ** table = (struct alloc_t **)ptr; | struct alloc_t ** table = (struct alloc_t **)ptr; | ||||
| int pos; | int pos; | ||||
| @@ -1243,27 +1243,27 @@ UNLOCK_COMMAND(&alloc_lock); | |||||
| while ((func != NULL) && (map_address == (void *) -1)) { | while ((func != NULL) && (map_address == (void *) -1)) { | ||||
| map_address = (*func)((void *)base_address); | |||||
| map_address = (*func)((void *)base_address); | |||||
| #ifdef ALLOC_DEVICEDRIVER | #ifdef ALLOC_DEVICEDRIVER | ||||
| if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { | |||||
| fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation failed.\n"); | |||||
| } | |||||
| if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { | |||||
| fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation failed.\n"); | |||||
| } | |||||
| #endif | #endif | ||||
| #ifdef ALLOC_HUGETLBFILE | #ifdef ALLOC_HUGETLBFILE | ||||
| if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { | |||||
| if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { | |||||
| #ifndef OS_WINDOWS | #ifndef OS_WINDOWS | ||||
| fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation failed.\n"); | |||||
| fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation failed.\n"); | |||||
| #endif | #endif | ||||
| } | |||||
| } | |||||
| #endif | #endif | ||||
| #if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) | #if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) | ||||
| if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; | |||||
| if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; | |||||
| #endif | #endif | ||||
| func ++; | |||||
| func ++; | |||||
| } | } | ||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| @@ -1377,7 +1377,7 @@ static BLASULONG init_lock = 0UL; | |||||
| #endif | #endif | ||||
| static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, | static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, | ||||
| void *sa, void *sb, BLASLONG pos) { | |||||
| void *sa, void *sb, BLASLONG pos) { | |||||
| #if !defined(ARCH_POWER) && !defined(ARCH_SPARC) | #if !defined(ARCH_POWER) && !defined(ARCH_SPARC) | ||||
| @@ -1507,11 +1507,11 @@ void CONSTRUCTOR gotoblas_init(void) { | |||||
| struct rlimit curlimit; | struct rlimit curlimit; | ||||
| if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) | if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) | ||||
| { | { | ||||
| if ( curlimit.rlim_cur != curlimit.rlim_max ) | |||||
| { | |||||
| curlimit.rlim_cur = curlimit.rlim_max; | |||||
| setrlimit(RLIMIT_STACK, &curlimit); | |||||
| } | |||||
| if ( curlimit.rlim_cur != curlimit.rlim_max ) | |||||
| { | |||||
| curlimit.rlim_cur = curlimit.rlim_max; | |||||
| setrlimit(RLIMIT_STACK, &curlimit); | |||||
| } | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -1545,7 +1545,7 @@ void DESTRUCTOR gotoblas_quit(void) { | |||||
| TlsFree(local_storage_key); | TlsFree(local_storage_key); | ||||
| #else | #else | ||||
| pthread_key_delete(local_storage_key); | pthread_key_delete(local_storage_key); | ||||
| #endif | |||||
| #endif | |||||
| #endif | #endif | ||||
| #ifdef PROFILE | #ifdef PROFILE | ||||
| @@ -1605,8 +1605,8 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser | |||||
| */ | */ | ||||
| static int on_process_term(void) | static int on_process_term(void) | ||||
| { | { | ||||
| gotoblas_quit(); | |||||
| return 0; | |||||
| gotoblas_quit(); | |||||
| return 0; | |||||
| } | } | ||||
| #ifdef _WIN64 | #ifdef _WIN64 | ||||
| #pragma comment(linker, "/INCLUDE:_tls_used") | #pragma comment(linker, "/INCLUDE:_tls_used") | ||||
| @@ -1705,7 +1705,7 @@ void gotoblas_dummy_for_PGI(void) { | |||||
| #if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__)) | #if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__)) | ||||
| #include <conio.h> | #include <conio.h> | ||||
| #undef printf | #undef printf | ||||
| #define printf _cprintf | |||||
| #define printf _cprintf | |||||
| #endif | #endif | ||||
| #ifdef OS_LINUX | #ifdef OS_LINUX | ||||
| @@ -1734,14 +1734,14 @@ void gotoblas_dummy_for_PGI(void) { | |||||
| #define CONSTRUCTOR __cdecl | #define CONSTRUCTOR __cdecl | ||||
| #define DESTRUCTOR __cdecl | #define DESTRUCTOR __cdecl | ||||
| #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) | #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) | ||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||||
| #elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) | #elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) | ||||
| #define CONSTRUCTOR __attribute__ ((constructor(101))) | |||||
| #define DESTRUCTOR __attribute__ ((destructor(101))) | |||||
| #define CONSTRUCTOR __attribute__ ((constructor(101))) | |||||
| #define DESTRUCTOR __attribute__ ((destructor(101))) | |||||
| #else | #else | ||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||||
| #endif | #endif | ||||
| #ifdef DYNAMIC_ARCH | #ifdef DYNAMIC_ARCH | ||||
| @@ -1817,7 +1817,7 @@ int get_num_procs(void) { | |||||
| return nums; | return nums; | ||||
| } | } | ||||
| ret = CPU_COUNT_S(size,cpusetp); | ret = CPU_COUNT_S(size,cpusetp); | ||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| CPU_FREE(cpusetp); | CPU_FREE(cpusetp); | ||||
| return nums; | return nums; | ||||
| } else { | } else { | ||||
| @@ -1826,7 +1826,7 @@ int get_num_procs(void) { | |||||
| return nums; | return nums; | ||||
| } | } | ||||
| ret = CPU_COUNT(&cpuset); | ret = CPU_COUNT(&cpuset); | ||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| return nums; | return nums; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -2083,26 +2083,26 @@ static void *alloc_mmap(void *address){ | |||||
| if (address){ | if (address){ | ||||
| map_address = mmap(address, | map_address = mmap(address, | ||||
| BUFFER_SIZE, | |||||
| MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); | |||||
| BUFFER_SIZE, | |||||
| MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0); | |||||
| } else { | } else { | ||||
| map_address = mmap(address, | map_address = mmap(address, | ||||
| BUFFER_SIZE, | |||||
| MMAP_ACCESS, MMAP_POLICY, -1, 0); | |||||
| BUFFER_SIZE, | |||||
| MMAP_ACCESS, MMAP_POLICY, -1, 0); | |||||
| } | } | ||||
| if (map_address != (void *)-1) { | if (map_address != (void *)-1) { | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | ||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| #endif | |||||
| release_info[release_pos].address = map_address; | release_info[release_pos].address = map_address; | ||||
| release_info[release_pos].func = alloc_mmap_free; | release_info[release_pos].func = alloc_mmap_free; | ||||
| release_pos ++; | release_pos ++; | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | ||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| #endif | |||||
| } else { | } else { | ||||
| #ifdef DEBUG | |||||
| #ifdef DEBUG | |||||
| int errsv=errno; | int errsv=errno; | ||||
| perror("OpenBLAS : mmap failed:"); | perror("OpenBLAS : mmap failed:"); | ||||
| printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); | printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); | ||||
| @@ -2119,7 +2119,7 @@ static void *alloc_mmap(void *address){ | |||||
| #else | #else | ||||
| #define BENCH_ITERATION 4 | #define BENCH_ITERATION 4 | ||||
| #define SCALING 2 | |||||
| #define SCALING 2 | |||||
| static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) { | static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) { | ||||
| @@ -2182,59 +2182,59 @@ static void *alloc_mmap(void *address){ | |||||
| #endif | #endif | ||||
| map_address = mmap(NULL, BUFFER_SIZE * SCALING, | map_address = mmap(NULL, BUFFER_SIZE * SCALING, | ||||
| MMAP_ACCESS, MMAP_POLICY, -1, 0); | |||||
| MMAP_ACCESS, MMAP_POLICY, -1, 0); | |||||
| if (map_address != (void *)-1) { | if (map_address != (void *)-1) { | ||||
| #ifdef OS_LINUX | #ifdef OS_LINUX | ||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| int ret=0; | |||||
| ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||||
| if(ret==-1){ | |||||
| int errsv=errno; | |||||
| perror("OpenBLAS alloc_mmap:"); | |||||
| printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); | |||||
| } | |||||
| int ret=0; | |||||
| ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||||
| if(ret==-1){ | |||||
| int errsv=errno; | |||||
| perror("OpenBLAS alloc_mmap:"); | |||||
| printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); | |||||
| } | |||||
| #else | #else | ||||
| my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||||
| my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0); | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| allocsize = DGEMM_P * DGEMM_Q * sizeof(double); | |||||
| allocsize = DGEMM_P * DGEMM_Q * sizeof(double); | |||||
| start = (BLASULONG)map_address; | |||||
| current = (SCALING - 1) * BUFFER_SIZE; | |||||
| start = (BLASULONG)map_address; | |||||
| current = (SCALING - 1) * BUFFER_SIZE; | |||||
| while(current > 0) { | |||||
| *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; | |||||
| start += PAGESIZE; | |||||
| current -= PAGESIZE; | |||||
| } | |||||
| while(current > 0) { | |||||
| *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; | |||||
| start += PAGESIZE; | |||||
| current -= PAGESIZE; | |||||
| } | |||||
| *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; | |||||
| *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; | |||||
| start = (BLASULONG)map_address; | |||||
| start = (BLASULONG)map_address; | |||||
| best = (BLASULONG)-1; | |||||
| best_address = map_address; | |||||
| best = (BLASULONG)-1; | |||||
| best_address = map_address; | |||||
| while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) { | |||||
| while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) { | |||||
| current = run_bench(start, allocsize); | |||||
| current = run_bench(start, allocsize); | |||||
| if (best > current) { | |||||
| best = current; | |||||
| best_address = (void *)start; | |||||
| } | |||||
| if (best > current) { | |||||
| best = current; | |||||
| best_address = (void *)start; | |||||
| } | |||||
| start += PAGESIZE; | |||||
| start += PAGESIZE; | |||||
| } | |||||
| } | |||||
| if ((BLASULONG)best_address > (BLASULONG)map_address) | if ((BLASULONG)best_address > (BLASULONG)map_address) | ||||
| munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); | |||||
| munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address); | |||||
| munmap((void *)((BLASULONG)best_address + BUFFER_SIZE), (SCALING - 1) * BUFFER_SIZE + (BLASULONG)map_address - (BLASULONG)best_address); | munmap((void *)((BLASULONG)best_address + BUFFER_SIZE), (SCALING - 1) * BUFFER_SIZE + (BLASULONG)map_address - (BLASULONG)best_address); | ||||
| @@ -2342,9 +2342,9 @@ static void *alloc_windows(void *address){ | |||||
| void *map_address; | void *map_address; | ||||
| map_address = VirtualAlloc(address, | map_address = VirtualAlloc(address, | ||||
| BUFFER_SIZE, | |||||
| MEM_RESERVE | MEM_COMMIT, | |||||
| PAGE_READWRITE); | |||||
| BUFFER_SIZE, | |||||
| MEM_RESERVE | MEM_COMMIT, | |||||
| PAGE_READWRITE); | |||||
| if (map_address == (void *)NULL) map_address = (void *)-1; | if (map_address == (void *)NULL) map_address = (void *)-1; | ||||
| @@ -2388,9 +2388,9 @@ static void *alloc_devicedirver(void *address){ | |||||
| } | } | ||||
| map_address = mmap(address, BUFFER_SIZE, | map_address = mmap(address, BUFFER_SIZE, | ||||
| PROT_READ | PROT_WRITE, | |||||
| MAP_FILE | MAP_SHARED, | |||||
| fd, 0); | |||||
| PROT_READ | PROT_WRITE, | |||||
| MAP_FILE | MAP_SHARED, | |||||
| fd, 0); | |||||
| if (map_address != (void *)-1) { | if (map_address != (void *)-1) { | ||||
| release_info[release_pos].address = map_address; | release_info[release_pos].address = map_address; | ||||
| @@ -2471,12 +2471,12 @@ static void *alloc_hugetlb(void *address){ | |||||
| shmid = shmget(IPC_PRIVATE, BUFFER_SIZE, | shmid = shmget(IPC_PRIVATE, BUFFER_SIZE, | ||||
| #ifdef OS_LINUX | #ifdef OS_LINUX | ||||
| SHM_HUGETLB | | |||||
| SHM_HUGETLB | | |||||
| #endif | #endif | ||||
| #ifdef OS_AIX | #ifdef OS_AIX | ||||
| SHM_LGPAGE | SHM_PIN | | |||||
| SHM_LGPAGE | SHM_PIN | | |||||
| #endif | #endif | ||||
| IPC_CREAT | SHM_R | SHM_W); | |||||
| IPC_CREAT | SHM_R | SHM_W); | |||||
| if (shmid != -1) { | if (shmid != -1) { | ||||
| map_address = (void *)shmat(shmid, address, SHM_RND); | map_address = (void *)shmat(shmid, address, SHM_RND); | ||||
| @@ -2511,7 +2511,7 @@ static void *alloc_hugetlb(void *address){ | |||||
| tp.PrivilegeCount = 1; | tp.PrivilegeCount = 1; | ||||
| tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; | tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; | ||||
| if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) { | if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) { | ||||
| CloseHandle(hToken); | CloseHandle(hToken); | ||||
| return (void*)-1; | return (void*)-1; | ||||
| @@ -2523,9 +2523,9 @@ static void *alloc_hugetlb(void *address){ | |||||
| } | } | ||||
| map_address = (void *)VirtualAlloc(address, | map_address = (void *)VirtualAlloc(address, | ||||
| BUFFER_SIZE, | |||||
| MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, | |||||
| PAGE_READWRITE); | |||||
| BUFFER_SIZE, | |||||
| MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT, | |||||
| PAGE_READWRITE); | |||||
| tp.Privileges[0].Attributes = 0; | tp.Privileges[0].Attributes = 0; | ||||
| AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL); | AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL); | ||||
| @@ -2578,9 +2578,9 @@ static void *alloc_hugetlbfile(void *address){ | |||||
| unlink(filename); | unlink(filename); | ||||
| map_address = mmap(address, BUFFER_SIZE, | map_address = mmap(address, BUFFER_SIZE, | ||||
| PROT_READ | PROT_WRITE, | |||||
| MAP_SHARED, | |||||
| fd, 0); | |||||
| PROT_READ | PROT_WRITE, | |||||
| MAP_SHARED, | |||||
| fd, 0); | |||||
| if (map_address != (void *)-1) { | if (map_address != (void *)-1) { | ||||
| release_info[release_pos].address = map_address; | release_info[release_pos].address = map_address; | ||||
| @@ -2717,7 +2717,7 @@ void *blas_memory_alloc(int procpos){ | |||||
| if (!memory[position].used && (memory[position].pos == mypos)) { | if (!memory[position].used && (memory[position].pos == mypos)) { | ||||
| #if defined(SMP) && !defined(USE_OPENMP) | #if defined(SMP) && !defined(USE_OPENMP) | ||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| #else | |||||
| #else | |||||
| blas_lock(&memory[position].lock); | blas_lock(&memory[position].lock); | ||||
| #endif | #endif | ||||
| if (!memory[position].used) goto allocation; | if (!memory[position].used) goto allocation; | ||||
| @@ -2725,7 +2725,7 @@ void *blas_memory_alloc(int procpos){ | |||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| #else | #else | ||||
| blas_unlock(&memory[position].lock); | blas_unlock(&memory[position].lock); | ||||
| #endif | |||||
| #endif | |||||
| } | } | ||||
| position ++; | position ++; | ||||
| @@ -2741,22 +2741,22 @@ void *blas_memory_alloc(int procpos){ | |||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| #endif | #endif | ||||
| do { | do { | ||||
| RMB; | |||||
| #if defined(USE_OPENMP) | |||||
| if (!memory[position].used) { | |||||
| RMB; | |||||
| #if defined(USE_OPENMP) | |||||
| if (!memory[position].used) { | |||||
| blas_lock(&memory[position].lock); | blas_lock(&memory[position].lock); | ||||
| #endif | #endif | ||||
| if (!memory[position].used) goto allocation; | if (!memory[position].used) goto allocation; | ||||
| #if defined(USE_OPENMP) | #if defined(USE_OPENMP) | ||||
| blas_unlock(&memory[position].lock); | |||||
| blas_unlock(&memory[position].lock); | |||||
| } | } | ||||
| #endif | #endif | ||||
| position ++; | position ++; | ||||
| } while (position < NUM_BUFFERS); | } while (position < NUM_BUFFERS); | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | ||||
| UNLOCK_COMMAND(&alloc_lock); | |||||
| UNLOCK_COMMAND(&alloc_lock); | |||||
| #endif | #endif | ||||
| goto error; | goto error; | ||||
| @@ -2770,7 +2770,7 @@ void *blas_memory_alloc(int procpos){ | |||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | ||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| #else | #else | ||||
| blas_unlock(&memory[position].lock); | |||||
| blas_unlock(&memory[position].lock); | |||||
| #endif | #endif | ||||
| if (!memory[position].addr) { | if (!memory[position].addr) { | ||||
| do { | do { | ||||
| @@ -2784,27 +2784,27 @@ void *blas_memory_alloc(int procpos){ | |||||
| while ((func != NULL) && (map_address == (void *) -1)) { | while ((func != NULL) && (map_address == (void *) -1)) { | ||||
| map_address = (*func)((void *)base_address); | |||||
| map_address = (*func)((void *)base_address); | |||||
| #ifdef ALLOC_DEVICEDRIVER | #ifdef ALLOC_DEVICEDRIVER | ||||
| if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { | |||||
| fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n"); | |||||
| } | |||||
| if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { | |||||
| fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n"); | |||||
| } | |||||
| #endif | #endif | ||||
| #ifdef ALLOC_HUGETLBFILE | #ifdef ALLOC_HUGETLBFILE | ||||
| if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { | |||||
| if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { | |||||
| #ifndef OS_WINDOWS | #ifndef OS_WINDOWS | ||||
| fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n"); | |||||
| fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n"); | |||||
| #endif | #endif | ||||
| } | |||||
| } | |||||
| #endif | #endif | ||||
| #if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) | #if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) | ||||
| if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; | |||||
| if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; | |||||
| #endif | #endif | ||||
| func ++; | |||||
| func ++; | |||||
| } | } | ||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| @@ -2818,7 +2818,7 @@ void *blas_memory_alloc(int procpos){ | |||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | ||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| #endif | |||||
| memory[position].addr = map_address; | memory[position].addr = map_address; | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | ||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| @@ -2856,7 +2856,7 @@ void *blas_memory_alloc(int procpos){ | |||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| printf("Mapped : %p %3d\n\n", | printf("Mapped : %p %3d\n\n", | ||||
| (void *)memory[position].addr, position); | |||||
| (void *)memory[position].addr, position); | |||||
| #endif | #endif | ||||
| return (void *)memory[position].addr; | return (void *)memory[position].addr; | ||||
| @@ -2972,7 +2972,7 @@ static BLASULONG init_lock = 0UL; | |||||
| #endif | #endif | ||||
| static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, | static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, | ||||
| void *sa, void *sb, BLASLONG pos) { | |||||
| void *sa, void *sb, BLASLONG pos) { | |||||
| #if !defined(ARCH_POWER) && !defined(ARCH_SPARC) | #if !defined(ARCH_POWER) && !defined(ARCH_SPARC) | ||||
| @@ -3099,15 +3099,15 @@ void CONSTRUCTOR gotoblas_init(void) { | |||||
| //#if defined(OS_LINUX) | //#if defined(OS_LINUX) | ||||
| #if 0 | #if 0 | ||||
| struct rlimit curlimit; | |||||
| if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) | |||||
| { | |||||
| if ( curlimit.rlim_cur != curlimit.rlim_max ) | |||||
| { | |||||
| curlimit.rlim_cur = curlimit.rlim_max; | |||||
| setrlimit(RLIMIT_STACK, &curlimit); | |||||
| } | |||||
| } | |||||
| struct rlimit curlimit; | |||||
| if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 ) | |||||
| { | |||||
| if ( curlimit.rlim_cur != curlimit.rlim_max ) | |||||
| { | |||||
| curlimit.rlim_cur = curlimit.rlim_max; | |||||
| setrlimit(RLIMIT_STACK, &curlimit); | |||||
| } | |||||
| } | |||||
| #endif | #endif | ||||
| #ifdef SMP | #ifdef SMP | ||||
| @@ -3189,8 +3189,8 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser | |||||
| */ | */ | ||||
| static int on_process_term(void) | static int on_process_term(void) | ||||
| { | { | ||||
| gotoblas_quit(); | |||||
| return 0; | |||||
| gotoblas_quit(); | |||||
| return 0; | |||||
| } | } | ||||
| #ifdef _WIN64 | #ifdef _WIN64 | ||||
| #pragma comment(linker, "/INCLUDE:_tls_used") | #pragma comment(linker, "/INCLUDE:_tls_used") | ||||
| @@ -3237,7 +3237,7 @@ void gotoblas_dummy_for_PGI(void) { | |||||
| asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); | asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); | ||||
| asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); | asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); | ||||
| #endif | #endif | ||||
| #endif | |||||
| #endif | |||||
| } | } | ||||
| #endif | #endif | ||||