| @@ -87,28 +87,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| /* Memory buffer must fit two matrix subblocks of maximal size */ | |||||
| #if BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 * 2) || \ | |||||
| BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_R * 4 * 2) || \ | |||||
| BUFFER_SIZE < (SGEMM_DEFAULT_R * SGEMM_DEFAULT_Q * 4 * 2) | |||||
| #error BUFFER_SIZE is too small for P, Q, and R of SGEMM | |||||
| #endif | |||||
| #if BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 * 2) || \ | |||||
| BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_R * 8 * 2) || \ | |||||
| BUFFER_SIZE < (DGEMM_DEFAULT_R * DGEMM_DEFAULT_Q * 8 * 2) | |||||
| #error BUFFER_SIZE is too small for P, Q, and R of DGEMM | |||||
| #endif | |||||
| #if BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 * 2) || \ | |||||
| BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_R * 8 * 2) || \ | |||||
| BUFFER_SIZE < (CGEMM_DEFAULT_R * CGEMM_DEFAULT_Q * 8 * 2) | |||||
| #error BUFFER_SIZE is too small for P, Q, and R of CGEMM | |||||
| #endif | |||||
| #if BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 * 2) || \ | |||||
| BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_R * 16 * 2) || \ | |||||
| BUFFER_SIZE < (ZGEMM_DEFAULT_R * ZGEMM_DEFAULT_Q * 16 * 2) | |||||
| #error BUFFER_SIZE is too small for P, Q, and R of ZGEMM | |||||
| #endif | |||||
| #if defined(COMPILE_TLS) | #if defined(COMPILE_TLS) | ||||
| #include <errno.h> | #include <errno.h> | ||||
| @@ -151,7 +129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include <unistd.h> | #include <unistd.h> | ||||
| #endif | #endif | ||||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) | |||||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) | |||||
| #include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
| #include <sys/resource.h> | #include <sys/resource.h> | ||||
| #endif | #endif | ||||
| @@ -214,74 +192,51 @@ void goto_set_num_threads(int num_threads) {}; | |||||
| #else | #else | ||||
| #if defined(OS_LINUX) || defined(OS_SUNOS) | |||||
| #if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD) | |||||
| #ifndef NO_AFFINITY | #ifndef NO_AFFINITY | ||||
| int get_num_procs(void); | int get_num_procs(void); | ||||
| #else | #else | ||||
| int get_num_procs(void) { | int get_num_procs(void) { | ||||
| static int nums = 0; | static int nums = 0; | ||||
| cpu_set_t cpuset,*cpusetp; | |||||
| size_t size; | |||||
| int ret; | |||||
| #if defined(__GLIBC_PREREQ) | |||||
| #if !__GLIBC_PREREQ(2, 7) | |||||
| int i; | |||||
| #if !__GLIBC_PREREQ(2, 6) | |||||
| int n; | |||||
| #endif | |||||
| #endif | |||||
| #endif | |||||
| cpu_set_t *cpusetp; | |||||
| size_t size; | |||||
| int ret; | |||||
| int i,n; | |||||
| if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | ||||
| #if !defined(OS_LINUX) | #if !defined(OS_LINUX) | ||||
| return nums; | |||||
| return nums; | |||||
| #endif | #endif | ||||
| #if !defined(__GLIBC_PREREQ) | #if !defined(__GLIBC_PREREQ) | ||||
| return nums; | |||||
| return nums; | |||||
| #else | #else | ||||
| #if !__GLIBC_PREREQ(2, 3) | #if !__GLIBC_PREREQ(2, 3) | ||||
| return nums; | |||||
| return nums; | |||||
| #endif | #endif | ||||
| #if !__GLIBC_PREREQ(2, 7) | #if !__GLIBC_PREREQ(2, 7) | ||||
| ret = sched_getaffinity(0,sizeof(cpuset), &cpuset); | |||||
| ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); | |||||
| if (ret!=0) return nums; | if (ret!=0) return nums; | ||||
| n=0; | n=0; | ||||
| #if !__GLIBC_PREREQ(2, 6) | #if !__GLIBC_PREREQ(2, 6) | ||||
| for (i=0;i<nums;i++) | for (i=0;i<nums;i++) | ||||
| if (CPU_ISSET(i,&cpuset)) n++; | |||||
| if (CPU_ISSET(i,cpusetp)) n++; | |||||
| nums=n; | nums=n; | ||||
| #else | #else | ||||
| nums = CPU_COUNT(sizeof(cpuset),&cpuset); | |||||
| nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp); | |||||
| #endif | #endif | ||||
| return nums; | return nums; | ||||
| #else | #else | ||||
| if (nums >= CPU_SETSIZE) { | |||||
| cpusetp = CPU_ALLOC(nums); | |||||
| if (cpusetp == NULL) { | |||||
| return nums; | |||||
| } | |||||
| size = CPU_ALLOC_SIZE(nums); | |||||
| ret = sched_getaffinity(0,size,cpusetp); | |||||
| if (ret!=0) { | |||||
| CPU_FREE(cpusetp); | |||||
| return nums; | |||||
| } | |||||
| ret = CPU_COUNT_S(size,cpusetp); | |||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| CPU_FREE(cpusetp); | |||||
| return nums; | |||||
| } else { | |||||
| ret = sched_getaffinity(0,sizeof(cpuset),&cpuset); | |||||
| if (ret!=0) { | |||||
| return nums; | |||||
| } | |||||
| ret = CPU_COUNT(&cpuset); | |||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| return nums; | |||||
| } | |||||
| cpusetp = CPU_ALLOC(nums); | |||||
| if (cpusetp == NULL) return nums; | |||||
| size = CPU_ALLOC_SIZE(nums); | |||||
| ret = sched_getaffinity(0,size,cpusetp); | |||||
| if (ret!=0) return nums; | |||||
| ret = CPU_COUNT_S(size,cpusetp); | |||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| CPU_FREE(cpusetp); | |||||
| return nums; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -334,7 +289,7 @@ int get_num_procs(void) { | |||||
| #endif | #endif | ||||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) | |||||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) | |||||
| int get_num_procs(void) { | int get_num_procs(void) { | ||||
| @@ -426,7 +381,7 @@ extern int openblas_goto_num_threads_env(); | |||||
| extern int openblas_omp_num_threads_env(); | extern int openblas_omp_num_threads_env(); | ||||
| int blas_get_cpu_number(void){ | int blas_get_cpu_number(void){ | ||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| int max_num; | int max_num; | ||||
| #endif | #endif | ||||
| int blas_goto_num = 0; | int blas_goto_num = 0; | ||||
| @@ -434,7 +389,7 @@ int blas_get_cpu_number(void){ | |||||
| if (blas_num_threads) return blas_num_threads; | if (blas_num_threads) return blas_num_threads; | ||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| max_num = get_num_procs(); | max_num = get_num_procs(); | ||||
| #endif | #endif | ||||
| @@ -458,7 +413,7 @@ int blas_get_cpu_number(void){ | |||||
| else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; | else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; | ||||
| else blas_num_threads = MAX_CPU_NUMBER; | else blas_num_threads = MAX_CPU_NUMBER; | ||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| if (blas_num_threads > max_num) blas_num_threads = max_num; | if (blas_num_threads > max_num) blas_num_threads = max_num; | ||||
| #endif | #endif | ||||
| @@ -844,7 +799,7 @@ static void *alloc_qalloc(void *address){ | |||||
| static void alloc_windows_free(struct alloc_t *alloc_info){ | static void alloc_windows_free(struct alloc_t *alloc_info){ | ||||
| VirtualFree(alloc_info, 0, MEM_RELEASE); | |||||
| VirtualFree(alloc_info, allocation_block_size, MEM_DECOMMIT); | |||||
| } | } | ||||
| @@ -957,7 +912,7 @@ static void alloc_hugetlb_free(struct alloc_t *alloc_info){ | |||||
| #ifdef OS_WINDOWS | #ifdef OS_WINDOWS | ||||
| VirtualFree(alloc_info, 0, MEM_LARGE_PAGES | MEM_RELEASE); | |||||
| VirtualFree(alloc_info, allocation_block_size, MEM_LARGE_PAGES | MEM_DECOMMIT); | |||||
| #endif | #endif | ||||
| @@ -1118,6 +1073,11 @@ static volatile int memory_initialized = 0; | |||||
| } | } | ||||
| free(table); | free(table); | ||||
| } | } | ||||
| #if defined(OS_WINDOWS) | |||||
| TlsFree(local_storage_key); | |||||
| #else | |||||
| pthread_key_delete(local_storage_key); | |||||
| #endif | |||||
| } | } | ||||
| static void blas_memory_init(){ | static void blas_memory_init(){ | ||||
| @@ -1335,13 +1295,6 @@ void blas_memory_free_nolock(void * map_address) { | |||||
| free(map_address); | free(map_address); | ||||
| } | } | ||||
| #ifdef SMP | |||||
| void blas_thread_memory_cleanup(void) { | |||||
| blas_memory_cleanup((void*)get_memory_table()); | |||||
| } | |||||
| #endif | |||||
| void blas_shutdown(void){ | void blas_shutdown(void){ | ||||
| #ifdef SMP | #ifdef SMP | ||||
| BLASFUNC(blas_thread_shutdown)(); | BLASFUNC(blas_thread_shutdown)(); | ||||
| @@ -1351,7 +1304,7 @@ void blas_shutdown(void){ | |||||
| /* Only cleanupIf we were built for threading and TLS was initialized */ | /* Only cleanupIf we were built for threading and TLS was initialized */ | ||||
| if (local_storage_key) | if (local_storage_key) | ||||
| #endif | #endif | ||||
| blas_thread_memory_cleanup(); | |||||
| blas_memory_cleanup((void*)get_memory_table()); | |||||
| #ifdef SEEK_ADDRESS | #ifdef SEEK_ADDRESS | ||||
| base_address = 0UL; | base_address = 0UL; | ||||
| @@ -1538,14 +1491,6 @@ void DESTRUCTOR gotoblas_quit(void) { | |||||
| blas_shutdown(); | blas_shutdown(); | ||||
| #if defined(SMP) | |||||
| #if defined(OS_WINDOWS) | |||||
| TlsFree(local_storage_key); | |||||
| #else | |||||
| pthread_key_delete(local_storage_key); | |||||
| #endif | |||||
| #endif | |||||
| #ifdef PROFILE | #ifdef PROFILE | ||||
| moncontrol (0); | moncontrol (0); | ||||
| #endif | #endif | ||||
| @@ -1581,7 +1526,7 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser | |||||
| break; | break; | ||||
| case DLL_THREAD_DETACH: | case DLL_THREAD_DETACH: | ||||
| #if defined(SMP) | #if defined(SMP) | ||||
| blas_thread_memory_cleanup(); | |||||
| blas_memory_cleanup((void*)get_memory_table()); | |||||
| #endif | #endif | ||||
| break; | break; | ||||
| case DLL_PROCESS_DETACH: | case DLL_PROCESS_DETACH: | ||||
| @@ -1644,7 +1589,6 @@ void gotoblas_dummy_for_PGI(void) { | |||||
| gotoblas_init(); | gotoblas_init(); | ||||
| gotoblas_quit(); | gotoblas_quit(); | ||||
| #if __PGIC__ < 19 | |||||
| #if 0 | #if 0 | ||||
| asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text"); | asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text"); | ||||
| asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text"); | asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text"); | ||||
| @@ -1652,16 +1596,13 @@ void gotoblas_dummy_for_PGI(void) { | |||||
| asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); | asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); | ||||
| asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); | asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); | ||||
| #endif | #endif | ||||
| #endif | |||||
| } | } | ||||
| #endif | #endif | ||||
| #else | #else | ||||
| /* USE_TLS / COMPILE_TLS not set */ | |||||
| #include <errno.h> | #include <errno.h> | ||||
| #if defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT) | |||||
| #ifdef OS_WINDOWS | |||||
| #define ALLOC_WINDOWS | #define ALLOC_WINDOWS | ||||
| #ifndef MEM_LARGE_PAGES | #ifndef MEM_LARGE_PAGES | ||||
| #define MEM_LARGE_PAGES 0x20000000 | #define MEM_LARGE_PAGES 0x20000000 | ||||
| @@ -1675,7 +1616,7 @@ void gotoblas_dummy_for_PGI(void) { | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <fcntl.h> | #include <fcntl.h> | ||||
| #if !defined(OS_WINDOWS) || defined(OS_CYGWIN_NT) | |||||
| #ifndef OS_WINDOWS | |||||
| #include <sys/mman.h> | #include <sys/mman.h> | ||||
| #ifndef NO_SYSV_IPC | #ifndef NO_SYSV_IPC | ||||
| #include <sys/shm.h> | #include <sys/shm.h> | ||||
| @@ -1695,7 +1636,7 @@ void gotoblas_dummy_for_PGI(void) { | |||||
| #include <sys/resource.h> | #include <sys/resource.h> | ||||
| #endif | #endif | ||||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) | |||||
| #if defined(OS_FREEBSD) || defined(OS_DARWIN) | |||||
| #include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
| #include <sys/resource.h> | #include <sys/resource.h> | ||||
| #endif | #endif | ||||
| @@ -1734,12 +1675,9 @@ void gotoblas_dummy_for_PGI(void) { | |||||
| #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) | #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) | ||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | #define CONSTRUCTOR __attribute__ ((constructor)) | ||||
| #define DESTRUCTOR __attribute__ ((destructor)) | #define DESTRUCTOR __attribute__ ((destructor)) | ||||
| #elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) | |||||
| #else | |||||
| #define CONSTRUCTOR __attribute__ ((constructor(101))) | #define CONSTRUCTOR __attribute__ ((constructor(101))) | ||||
| #define DESTRUCTOR __attribute__ ((destructor(101))) | #define DESTRUCTOR __attribute__ ((destructor(101))) | ||||
| #else | |||||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||||
| #endif | #endif | ||||
| #ifdef DYNAMIC_ARCH | #ifdef DYNAMIC_ARCH | ||||
| @@ -1758,75 +1696,50 @@ void goto_set_num_threads(int num_threads) {}; | |||||
| #else | #else | ||||
| #if defined(OS_LINUX) || defined(OS_SUNOS) | |||||
| #if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD) | |||||
| #ifndef NO_AFFINITY | #ifndef NO_AFFINITY | ||||
| int get_num_procs(void); | int get_num_procs(void); | ||||
| #else | #else | ||||
| int get_num_procs(void) { | int get_num_procs(void) { | ||||
| static int nums = 0; | static int nums = 0; | ||||
| cpu_set_t cpuset,*cpusetp; | |||||
| size_t size; | |||||
| int ret; | |||||
| #if defined(__GLIBC_PREREQ) | |||||
| #if !__GLIBC_PREREQ(2, 7) | |||||
| int i; | |||||
| #if !__GLIBC_PREREQ(2, 6) | |||||
| int n; | |||||
| #endif | |||||
| #endif | |||||
| #endif | |||||
| cpu_set_t *cpusetp; | |||||
| size_t size; | |||||
| int ret; | |||||
| int i,n; | |||||
| if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | ||||
| #if !defined(OS_LINUX) | #if !defined(OS_LINUX) | ||||
| return nums; | |||||
| return nums; | |||||
| #endif | #endif | ||||
| #if !defined(__GLIBC_PREREQ) | #if !defined(__GLIBC_PREREQ) | ||||
| return nums; | |||||
| return nums; | |||||
| #else | #else | ||||
| #if !__GLIBC_PREREQ(2, 3) | #if !__GLIBC_PREREQ(2, 3) | ||||
| return nums; | |||||
| return nums; | |||||
| #endif | #endif | ||||
| #if !__GLIBC_PREREQ(2, 7) | #if !__GLIBC_PREREQ(2, 7) | ||||
| ret = sched_getaffinity(0,sizeof(cpuset), &cpuset); | |||||
| ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); | |||||
| if (ret!=0) return nums; | if (ret!=0) return nums; | ||||
| n=0; | n=0; | ||||
| #if !__GLIBC_PREREQ(2, 6) | #if !__GLIBC_PREREQ(2, 6) | ||||
| for (i=0;i<nums;i++) | for (i=0;i<nums;i++) | ||||
| if (CPU_ISSET(i,&cpuset)) n++; | |||||
| if (CPU_ISSET(i,cpusetp)) n++; | |||||
| nums=n; | nums=n; | ||||
| #else | #else | ||||
| nums = CPU_COUNT(sizeof(cpuset),&cpuset); | |||||
| nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp); | |||||
| #endif | #endif | ||||
| return nums; | return nums; | ||||
| #else | #else | ||||
| if (nums >= CPU_SETSIZE) { | |||||
| cpusetp = CPU_ALLOC(nums); | |||||
| if (cpusetp == NULL) { | |||||
| return nums; | |||||
| } | |||||
| size = CPU_ALLOC_SIZE(nums); | |||||
| ret = sched_getaffinity(0,size,cpusetp); | |||||
| if (ret!=0) { | |||||
| CPU_FREE(cpusetp); | |||||
| return nums; | |||||
| } | |||||
| ret = CPU_COUNT_S(size,cpusetp); | |||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| CPU_FREE(cpusetp); | |||||
| return nums; | |||||
| } else { | |||||
| ret = sched_getaffinity(0,sizeof(cpuset),&cpuset); | |||||
| if (ret!=0) { | |||||
| return nums; | |||||
| } | |||||
| ret = CPU_COUNT(&cpuset); | |||||
| if (ret > 0 && ret < nums) nums = ret; | |||||
| return nums; | |||||
| } | |||||
| cpusetp = CPU_ALLOC(nums); | |||||
| if (cpusetp == NULL) return nums; | |||||
| size = CPU_ALLOC_SIZE(nums); | |||||
| ret = sched_getaffinity(0,size,cpusetp); | |||||
| if (ret!=0) return nums; | |||||
| nums = CPU_COUNT_S(size,cpusetp); | |||||
| CPU_FREE(cpusetp); | |||||
| return nums; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -1840,7 +1753,7 @@ int get_num_procs(void) { | |||||
| return nums; | return nums; | ||||
| } | } | ||||
| #endif | #endif | ||||
| #ifdef OS_HAIKU | #ifdef OS_HAIKU | ||||
| int get_num_procs(void) { | int get_num_procs(void) { | ||||
| static int nums = 0; | static int nums = 0; | ||||
| @@ -1877,7 +1790,7 @@ int get_num_procs(void) { | |||||
| #endif | #endif | ||||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) | |||||
| #if defined(OS_FREEBSD) | |||||
| int get_num_procs(void) { | int get_num_procs(void) { | ||||
| @@ -1954,7 +1867,7 @@ void openblas_fork_handler() | |||||
| // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035 | // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035 | ||||
| // In the mean time build with USE_OPENMP=0 or link against another | // In the mean time build with USE_OPENMP=0 or link against another | ||||
| // implementation of OpenMP. | // implementation of OpenMP. | ||||
| #if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER) | |||||
| #if !(defined(OS_WINDOWS) || defined(OS_ANDROID)) && defined(SMP_SERVER) | |||||
| int err; | int err; | ||||
| err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL); | err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL); | ||||
| if(err != 0) | if(err != 0) | ||||
| @@ -1967,7 +1880,7 @@ extern int openblas_goto_num_threads_env(); | |||||
| extern int openblas_omp_num_threads_env(); | extern int openblas_omp_num_threads_env(); | ||||
| int blas_get_cpu_number(void){ | int blas_get_cpu_number(void){ | ||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| int max_num; | int max_num; | ||||
| #endif | #endif | ||||
| int blas_goto_num = 0; | int blas_goto_num = 0; | ||||
| @@ -1975,11 +1888,11 @@ int blas_get_cpu_number(void){ | |||||
| if (blas_num_threads) return blas_num_threads; | if (blas_num_threads) return blas_num_threads; | ||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| max_num = get_num_procs(); | max_num = get_num_procs(); | ||||
| #endif | #endif | ||||
| // blas_goto_num = 0; | |||||
| blas_goto_num = 0; | |||||
| #ifndef USE_OPENMP | #ifndef USE_OPENMP | ||||
| blas_goto_num=openblas_num_threads_env(); | blas_goto_num=openblas_num_threads_env(); | ||||
| if (blas_goto_num < 0) blas_goto_num = 0; | if (blas_goto_num < 0) blas_goto_num = 0; | ||||
| @@ -1991,7 +1904,7 @@ int blas_get_cpu_number(void){ | |||||
| #endif | #endif | ||||
| // blas_omp_num = 0; | |||||
| blas_omp_num = 0; | |||||
| blas_omp_num=openblas_omp_num_threads_env(); | blas_omp_num=openblas_omp_num_threads_env(); | ||||
| if (blas_omp_num < 0) blas_omp_num = 0; | if (blas_omp_num < 0) blas_omp_num = 0; | ||||
| @@ -1999,7 +1912,7 @@ int blas_get_cpu_number(void){ | |||||
| else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; | else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; | ||||
| else blas_num_threads = MAX_CPU_NUMBER; | else blas_num_threads = MAX_CPU_NUMBER; | ||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||||
| if (blas_num_threads > max_num) blas_num_threads = max_num; | if (blas_num_threads > max_num) blas_num_threads = max_num; | ||||
| #endif | #endif | ||||
| @@ -2063,12 +1976,8 @@ static BLASULONG alloc_lock = 0UL; | |||||
| static void alloc_mmap_free(struct release_t *release){ | static void alloc_mmap_free(struct release_t *release){ | ||||
| if (!release->address) return; | |||||
| if (munmap(release -> address, BUFFER_SIZE)) { | if (munmap(release -> address, BUFFER_SIZE)) { | ||||
| int errsv=errno; | |||||
| perror("OpenBLAS : munmap failed:"); | |||||
| printf("error code=%d,\trelease->address=%lx\n",errsv,release->address); | |||||
| printf("OpenBLAS : munmap failed\n"); | |||||
| } | } | ||||
| } | } | ||||
| @@ -2090,21 +1999,11 @@ static void *alloc_mmap(void *address){ | |||||
| } | } | ||||
| if (map_address != (void *)-1) { | if (map_address != (void *)-1) { | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| release_info[release_pos].address = map_address; | release_info[release_pos].address = map_address; | ||||
| release_info[release_pos].func = alloc_mmap_free; | release_info[release_pos].func = alloc_mmap_free; | ||||
| release_pos ++; | release_pos ++; | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| } else { | |||||
| #ifdef DEBUG | |||||
| int errsv=errno; | |||||
| perror("OpenBLAS : mmap failed:"); | |||||
| printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); | |||||
| #endif | |||||
| } | } | ||||
| #ifdef OS_LINUX | #ifdef OS_LINUX | ||||
| @@ -2246,18 +2145,14 @@ static void *alloc_mmap(void *address){ | |||||
| #if defined(OS_LINUX) && !defined(NO_WARMUP) | #if defined(OS_LINUX) && !defined(NO_WARMUP) | ||||
| } | } | ||||
| #endif | #endif | ||||
| LOCK_COMMAND(&alloc_lock); | |||||
| if (map_address != (void *)-1) { | if (map_address != (void *)-1) { | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| LOCK_COMMAND(&alloc_lock); | |||||
| #endif | |||||
| release_info[release_pos].address = map_address; | release_info[release_pos].address = map_address; | ||||
| release_info[release_pos].func = alloc_mmap_free; | release_info[release_pos].func = alloc_mmap_free; | ||||
| release_pos ++; | release_pos ++; | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| UNLOCK_COMMAND(&alloc_lock); | |||||
| #endif | |||||
| } | } | ||||
| UNLOCK_COMMAND(&alloc_lock); | |||||
| return map_address; | return map_address; | ||||
| } | } | ||||
| @@ -2332,7 +2227,7 @@ static void *alloc_qalloc(void *address){ | |||||
| static void alloc_windows_free(struct release_t *release){ | static void alloc_windows_free(struct release_t *release){ | ||||
| VirtualFree(release -> address, 0, MEM_RELEASE); | |||||
| VirtualFree(release -> address, BUFFER_SIZE, MEM_DECOMMIT); | |||||
| } | } | ||||
| @@ -2454,7 +2349,7 @@ static void alloc_hugetlb_free(struct release_t *release){ | |||||
| #ifdef OS_WINDOWS | #ifdef OS_WINDOWS | ||||
| VirtualFree(release -> address, 0, MEM_LARGE_PAGES | MEM_RELEASE); | |||||
| VirtualFree(release -> address, BUFFER_SIZE, MEM_LARGE_PAGES | MEM_DECOMMIT); | |||||
| #endif | #endif | ||||
| @@ -2625,7 +2520,7 @@ void *blas_memory_alloc(int procpos){ | |||||
| int position; | int position; | ||||
| #if defined(WHEREAMI) && !defined(USE_OPENMP) | #if defined(WHEREAMI) && !defined(USE_OPENMP) | ||||
| int mypos = 0; | |||||
| int mypos; | |||||
| #endif | #endif | ||||
| void *map_address; | void *map_address; | ||||
| @@ -2656,11 +2551,6 @@ void *blas_memory_alloc(int procpos){ | |||||
| NULL, | NULL, | ||||
| }; | }; | ||||
| void *(**func)(void *address); | void *(**func)(void *address); | ||||
| #if defined(USE_OPENMP) | |||||
| if (!memory_initialized) { | |||||
| #endif | |||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| if (!memory_initialized) { | if (!memory_initialized) { | ||||
| @@ -2696,9 +2586,6 @@ void *blas_memory_alloc(int procpos){ | |||||
| } | } | ||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| #if defined(USE_OPENMP) | |||||
| } | |||||
| #endif | |||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| printf("Alloc Start ...\n"); | printf("Alloc Start ...\n"); | ||||
| @@ -2713,17 +2600,13 @@ void *blas_memory_alloc(int procpos){ | |||||
| do { | do { | ||||
| if (!memory[position].used && (memory[position].pos == mypos)) { | if (!memory[position].used && (memory[position].pos == mypos)) { | ||||
| #if defined(SMP) && !defined(USE_OPENMP) | |||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| #else | |||||
| blas_lock(&memory[position].lock); | |||||
| #endif | |||||
| // blas_lock(&memory[position].lock); | |||||
| if (!memory[position].used) goto allocation; | if (!memory[position].used) goto allocation; | ||||
| #if defined(SMP) && !defined(USE_OPENMP) | |||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| #else | |||||
| blas_unlock(&memory[position].lock); | |||||
| #endif | |||||
| // blas_unlock(&memory[position].lock); | |||||
| } | } | ||||
| position ++; | position ++; | ||||
| @@ -2735,26 +2618,21 @@ void *blas_memory_alloc(int procpos){ | |||||
| position = 0; | position = 0; | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| do { | do { | ||||
| #if defined(USE_OPENMP) | |||||
| if (!memory[position].used) { | |||||
| blas_lock(&memory[position].lock); | |||||
| #endif | |||||
| /* if (!memory[position].used) { */ | |||||
| /* blas_lock(&memory[position].lock);*/ | |||||
| if (!memory[position].used) goto allocation; | if (!memory[position].used) goto allocation; | ||||
| #if defined(USE_OPENMP) | |||||
| blas_unlock(&memory[position].lock); | |||||
| } | |||||
| #endif | |||||
| /* blas_unlock(&memory[position].lock);*/ | |||||
| /* } */ | |||||
| position ++; | position ++; | ||||
| } while (position < NUM_BUFFERS); | } while (position < NUM_BUFFERS); | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| UNLOCK_COMMAND(&alloc_lock); | |||||
| #endif | |||||
| UNLOCK_COMMAND(&alloc_lock); | |||||
| goto error; | goto error; | ||||
| allocation : | allocation : | ||||
| @@ -2764,11 +2642,10 @@ void *blas_memory_alloc(int procpos){ | |||||
| #endif | #endif | ||||
| memory[position].used = 1; | memory[position].used = 1; | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| #else | |||||
| blas_unlock(&memory[position].lock); | |||||
| #endif | |||||
| /* blas_unlock(&memory[position].lock);*/ | |||||
| if (!memory[position].addr) { | if (!memory[position].addr) { | ||||
| do { | do { | ||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| @@ -2785,7 +2662,7 @@ void *blas_memory_alloc(int procpos){ | |||||
| #ifdef ALLOC_DEVICEDRIVER | #ifdef ALLOC_DEVICEDRIVER | ||||
| if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { | if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { | ||||
| fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n"); | |||||
| fprintf(stderr, "OpenBLAS Warning ... Physically contigous allocation was failed.\n"); | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -2813,13 +2690,9 @@ void *blas_memory_alloc(int procpos){ | |||||
| } while ((BLASLONG)map_address == -1); | } while ((BLASLONG)map_address == -1); | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| memory[position].addr = map_address; | memory[position].addr = map_address; | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position); | printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position); | ||||
| @@ -2873,9 +2746,8 @@ void blas_memory_free(void *free_area){ | |||||
| #endif | #endif | ||||
| position = 0; | position = 0; | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) | while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) | ||||
| position++; | position++; | ||||
| @@ -2889,9 +2761,7 @@ void blas_memory_free(void *free_area){ | |||||
| WMB; | WMB; | ||||
| memory[position].used = 0; | memory[position].used = 0; | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| printf("Unmap Succeeded.\n\n"); | printf("Unmap Succeeded.\n\n"); | ||||
| @@ -2906,9 +2776,8 @@ void blas_memory_free(void *free_area){ | |||||
| for (position = 0; position < NUM_BUFFERS; position++) | for (position = 0; position < NUM_BUFFERS; position++) | ||||
| printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used); | printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used); | ||||
| #endif | #endif | ||||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| #endif | |||||
| return; | return; | ||||
| } | } | ||||
| @@ -2958,7 +2827,7 @@ void blas_shutdown(void){ | |||||
| #if defined(OS_LINUX) && !defined(NO_WARMUP) | #if defined(OS_LINUX) && !defined(NO_WARMUP) | ||||
| #if defined(SMP) || defined(USE_LOCKING) | |||||
| #ifdef SMP | |||||
| #if defined(USE_PTHREAD_LOCK) | #if defined(USE_PTHREAD_LOCK) | ||||
| static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; | static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; | ||||
| #elif defined(USE_PTHREAD_SPINLOCK) | #elif defined(USE_PTHREAD_SPINLOCK) | ||||
| @@ -2983,7 +2852,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, | |||||
| if (hot_alloc != 2) { | if (hot_alloc != 2) { | ||||
| #endif | #endif | ||||
| #if defined(SMP) || defined(USE_LOCKING) | |||||
| #ifdef SMP | |||||
| LOCK_COMMAND(&init_lock); | LOCK_COMMAND(&init_lock); | ||||
| #endif | #endif | ||||
| @@ -2993,7 +2862,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, | |||||
| size -= PAGESIZE; | size -= PAGESIZE; | ||||
| } | } | ||||
| #if defined(SMP) || defined(USE_LOCKING) | |||||
| #ifdef SMP | |||||
| UNLOCK_COMMAND(&init_lock); | UNLOCK_COMMAND(&init_lock); | ||||
| #endif | #endif | ||||
| @@ -3226,7 +3095,7 @@ void gotoblas_dummy_for_PGI(void) { | |||||
| gotoblas_init(); | gotoblas_init(); | ||||
| gotoblas_quit(); | gotoblas_quit(); | ||||
| #if __PGIC__ < 19 | |||||
| #if 0 | #if 0 | ||||
| asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text"); | asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text"); | ||||
| asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text"); | asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text"); | ||||
| @@ -3234,7 +3103,6 @@ void gotoblas_dummy_for_PGI(void) { | |||||
| asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); | asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); | ||||
| asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); | asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); | ||||
| #endif | #endif | ||||
| #endif | |||||
| } | } | ||||
| #endif | #endif | ||||