| @@ -87,28 +87,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #endif | |||
| #endif | |||
| /* Memory buffer must fit two matrix subblocks of maximal size */ | |||
| #if BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 * 2) || \ | |||
| BUFFER_SIZE < (SGEMM_DEFAULT_P * SGEMM_DEFAULT_R * 4 * 2) || \ | |||
| BUFFER_SIZE < (SGEMM_DEFAULT_R * SGEMM_DEFAULT_Q * 4 * 2) | |||
| #error BUFFER_SIZE is too small for P, Q, and R of SGEMM | |||
| #endif | |||
| #if BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 * 2) || \ | |||
| BUFFER_SIZE < (DGEMM_DEFAULT_P * DGEMM_DEFAULT_R * 8 * 2) || \ | |||
| BUFFER_SIZE < (DGEMM_DEFAULT_R * DGEMM_DEFAULT_Q * 8 * 2) | |||
| #error BUFFER_SIZE is too small for P, Q, and R of DGEMM | |||
| #endif | |||
| #if BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 * 2) || \ | |||
| BUFFER_SIZE < (CGEMM_DEFAULT_P * CGEMM_DEFAULT_R * 8 * 2) || \ | |||
| BUFFER_SIZE < (CGEMM_DEFAULT_R * CGEMM_DEFAULT_Q * 8 * 2) | |||
| #error BUFFER_SIZE is too small for P, Q, and R of CGEMM | |||
| #endif | |||
| #if BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 * 2) || \ | |||
| BUFFER_SIZE < (ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_R * 16 * 2) || \ | |||
| BUFFER_SIZE < (ZGEMM_DEFAULT_R * ZGEMM_DEFAULT_Q * 16 * 2) | |||
| #error BUFFER_SIZE is too small for P, Q, and R of ZGEMM | |||
| #endif | |||
| #if defined(COMPILE_TLS) | |||
| #include <errno.h> | |||
| @@ -151,7 +129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include <unistd.h> | |||
| #endif | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) | |||
| #include <sys/sysctl.h> | |||
| #include <sys/resource.h> | |||
| #endif | |||
| @@ -214,74 +192,51 @@ void goto_set_num_threads(int num_threads) {}; | |||
| #else | |||
| #if defined(OS_LINUX) || defined(OS_SUNOS) | |||
| #if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD) | |||
| #ifndef NO_AFFINITY | |||
| int get_num_procs(void); | |||
| #else | |||
| int get_num_procs(void) { | |||
| static int nums = 0; | |||
| cpu_set_t cpuset,*cpusetp; | |||
| size_t size; | |||
| int ret; | |||
| #if defined(__GLIBC_PREREQ) | |||
| #if !__GLIBC_PREREQ(2, 7) | |||
| int i; | |||
| #if !__GLIBC_PREREQ(2, 6) | |||
| int n; | |||
| #endif | |||
| #endif | |||
| #endif | |||
| cpu_set_t *cpusetp; | |||
| size_t size; | |||
| int ret; | |||
| int i,n; | |||
| if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | |||
| #if !defined(OS_LINUX) | |||
| return nums; | |||
| return nums; | |||
| #endif | |||
| #if !defined(__GLIBC_PREREQ) | |||
| return nums; | |||
| return nums; | |||
| #else | |||
| #if !__GLIBC_PREREQ(2, 3) | |||
| return nums; | |||
| return nums; | |||
| #endif | |||
| #if !__GLIBC_PREREQ(2, 7) | |||
| ret = sched_getaffinity(0,sizeof(cpuset), &cpuset); | |||
| ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); | |||
| if (ret!=0) return nums; | |||
| n=0; | |||
| #if !__GLIBC_PREREQ(2, 6) | |||
| for (i=0;i<nums;i++) | |||
| if (CPU_ISSET(i,&cpuset)) n++; | |||
| if (CPU_ISSET(i,cpusetp)) n++; | |||
| nums=n; | |||
| #else | |||
| nums = CPU_COUNT(sizeof(cpuset),&cpuset); | |||
| nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp); | |||
| #endif | |||
| return nums; | |||
| #else | |||
| if (nums >= CPU_SETSIZE) { | |||
| cpusetp = CPU_ALLOC(nums); | |||
| if (cpusetp == NULL) { | |||
| return nums; | |||
| } | |||
| size = CPU_ALLOC_SIZE(nums); | |||
| ret = sched_getaffinity(0,size,cpusetp); | |||
| if (ret!=0) { | |||
| CPU_FREE(cpusetp); | |||
| return nums; | |||
| } | |||
| ret = CPU_COUNT_S(size,cpusetp); | |||
| if (ret > 0 && ret < nums) nums = ret; | |||
| CPU_FREE(cpusetp); | |||
| return nums; | |||
| } else { | |||
| ret = sched_getaffinity(0,sizeof(cpuset),&cpuset); | |||
| if (ret!=0) { | |||
| return nums; | |||
| } | |||
| ret = CPU_COUNT(&cpuset); | |||
| if (ret > 0 && ret < nums) nums = ret; | |||
| return nums; | |||
| } | |||
| cpusetp = CPU_ALLOC(nums); | |||
| if (cpusetp == NULL) return nums; | |||
| size = CPU_ALLOC_SIZE(nums); | |||
| ret = sched_getaffinity(0,size,cpusetp); | |||
| if (ret!=0) return nums; | |||
| ret = CPU_COUNT_S(size,cpusetp); | |||
| if (ret > 0 && ret < nums) nums = ret; | |||
| CPU_FREE(cpusetp); | |||
| return nums; | |||
| #endif | |||
| #endif | |||
| } | |||
| @@ -334,7 +289,7 @@ int get_num_procs(void) { | |||
| #endif | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) | |||
| int get_num_procs(void) { | |||
| @@ -426,7 +381,7 @@ extern int openblas_goto_num_threads_env(); | |||
| extern int openblas_omp_num_threads_env(); | |||
| int blas_get_cpu_number(void){ | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| int max_num; | |||
| #endif | |||
| int blas_goto_num = 0; | |||
| @@ -434,7 +389,7 @@ int blas_get_cpu_number(void){ | |||
| if (blas_num_threads) return blas_num_threads; | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| max_num = get_num_procs(); | |||
| #endif | |||
| @@ -458,7 +413,7 @@ int blas_get_cpu_number(void){ | |||
| else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; | |||
| else blas_num_threads = MAX_CPU_NUMBER; | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| if (blas_num_threads > max_num) blas_num_threads = max_num; | |||
| #endif | |||
| @@ -844,7 +799,7 @@ static void *alloc_qalloc(void *address){ | |||
| static void alloc_windows_free(struct alloc_t *alloc_info){ | |||
| VirtualFree(alloc_info, 0, MEM_RELEASE); | |||
| VirtualFree(alloc_info, allocation_block_size, MEM_DECOMMIT); | |||
| } | |||
| @@ -957,7 +912,7 @@ static void alloc_hugetlb_free(struct alloc_t *alloc_info){ | |||
| #ifdef OS_WINDOWS | |||
| VirtualFree(alloc_info, 0, MEM_LARGE_PAGES | MEM_RELEASE); | |||
| VirtualFree(alloc_info, allocation_block_size, MEM_LARGE_PAGES | MEM_DECOMMIT); | |||
| #endif | |||
| @@ -1118,6 +1073,11 @@ static volatile int memory_initialized = 0; | |||
| } | |||
| free(table); | |||
| } | |||
| #if defined(OS_WINDOWS) | |||
| TlsFree(local_storage_key); | |||
| #else | |||
| pthread_key_delete(local_storage_key); | |||
| #endif | |||
| } | |||
| static void blas_memory_init(){ | |||
| @@ -1335,13 +1295,6 @@ void blas_memory_free_nolock(void * map_address) { | |||
| free(map_address); | |||
| } | |||
| #ifdef SMP | |||
| void blas_thread_memory_cleanup(void) { | |||
| blas_memory_cleanup((void*)get_memory_table()); | |||
| } | |||
| #endif | |||
| void blas_shutdown(void){ | |||
| #ifdef SMP | |||
| BLASFUNC(blas_thread_shutdown)(); | |||
| @@ -1351,7 +1304,7 @@ void blas_shutdown(void){ | |||
| /* Only cleanupIf we were built for threading and TLS was initialized */ | |||
| if (local_storage_key) | |||
| #endif | |||
| blas_thread_memory_cleanup(); | |||
| blas_memory_cleanup((void*)get_memory_table()); | |||
| #ifdef SEEK_ADDRESS | |||
| base_address = 0UL; | |||
| @@ -1538,14 +1491,6 @@ void DESTRUCTOR gotoblas_quit(void) { | |||
| blas_shutdown(); | |||
| #if defined(SMP) | |||
| #if defined(OS_WINDOWS) | |||
| TlsFree(local_storage_key); | |||
| #else | |||
| pthread_key_delete(local_storage_key); | |||
| #endif | |||
| #endif | |||
| #ifdef PROFILE | |||
| moncontrol (0); | |||
| #endif | |||
| @@ -1581,7 +1526,7 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReser | |||
| break; | |||
| case DLL_THREAD_DETACH: | |||
| #if defined(SMP) | |||
| blas_thread_memory_cleanup(); | |||
| blas_memory_cleanup((void*)get_memory_table()); | |||
| #endif | |||
| break; | |||
| case DLL_PROCESS_DETACH: | |||
| @@ -1644,7 +1589,6 @@ void gotoblas_dummy_for_PGI(void) { | |||
| gotoblas_init(); | |||
| gotoblas_quit(); | |||
| #if __PGIC__ < 19 | |||
| #if 0 | |||
| asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text"); | |||
| asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text"); | |||
| @@ -1652,16 +1596,13 @@ void gotoblas_dummy_for_PGI(void) { | |||
| asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); | |||
| asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); | |||
| #endif | |||
| #endif | |||
| } | |||
| #endif | |||
| #else | |||
| /* USE_TLS / COMPILE_TLS not set */ | |||
| #include <errno.h> | |||
| #if defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT) | |||
| #ifdef OS_WINDOWS | |||
| #define ALLOC_WINDOWS | |||
| #ifndef MEM_LARGE_PAGES | |||
| #define MEM_LARGE_PAGES 0x20000000 | |||
| @@ -1675,7 +1616,7 @@ void gotoblas_dummy_for_PGI(void) { | |||
| #include <stdio.h> | |||
| #include <fcntl.h> | |||
| #if !defined(OS_WINDOWS) || defined(OS_CYGWIN_NT) | |||
| #ifndef OS_WINDOWS | |||
| #include <sys/mman.h> | |||
| #ifndef NO_SYSV_IPC | |||
| #include <sys/shm.h> | |||
| @@ -1695,7 +1636,7 @@ void gotoblas_dummy_for_PGI(void) { | |||
| #include <sys/resource.h> | |||
| #endif | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) | |||
| #if defined(OS_FREEBSD) || defined(OS_DARWIN) | |||
| #include <sys/sysctl.h> | |||
| #include <sys/resource.h> | |||
| #endif | |||
| @@ -1734,12 +1675,9 @@ void gotoblas_dummy_for_PGI(void) { | |||
| #elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC) | |||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||
| #elif __GNUC__ && INIT_PRIORITY && ((GCC_VERSION >= 40300) || (CLANG_VERSION >= 20900)) | |||
| #else | |||
| #define CONSTRUCTOR __attribute__ ((constructor(101))) | |||
| #define DESTRUCTOR __attribute__ ((destructor(101))) | |||
| #else | |||
| #define CONSTRUCTOR __attribute__ ((constructor)) | |||
| #define DESTRUCTOR __attribute__ ((destructor)) | |||
| #endif | |||
| #ifdef DYNAMIC_ARCH | |||
| @@ -1758,75 +1696,50 @@ void goto_set_num_threads(int num_threads) {}; | |||
| #else | |||
| #if defined(OS_LINUX) || defined(OS_SUNOS) | |||
| #if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD) | |||
| #ifndef NO_AFFINITY | |||
| int get_num_procs(void); | |||
| #else | |||
| int get_num_procs(void) { | |||
| static int nums = 0; | |||
| cpu_set_t cpuset,*cpusetp; | |||
| size_t size; | |||
| int ret; | |||
| #if defined(__GLIBC_PREREQ) | |||
| #if !__GLIBC_PREREQ(2, 7) | |||
| int i; | |||
| #if !__GLIBC_PREREQ(2, 6) | |||
| int n; | |||
| #endif | |||
| #endif | |||
| #endif | |||
| cpu_set_t *cpusetp; | |||
| size_t size; | |||
| int ret; | |||
| int i,n; | |||
| if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | |||
| #if !defined(OS_LINUX) | |||
| return nums; | |||
| return nums; | |||
| #endif | |||
| #if !defined(__GLIBC_PREREQ) | |||
| return nums; | |||
| return nums; | |||
| #else | |||
| #if !__GLIBC_PREREQ(2, 3) | |||
| return nums; | |||
| return nums; | |||
| #endif | |||
| #if !__GLIBC_PREREQ(2, 7) | |||
| ret = sched_getaffinity(0,sizeof(cpuset), &cpuset); | |||
| ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); | |||
| if (ret!=0) return nums; | |||
| n=0; | |||
| #if !__GLIBC_PREREQ(2, 6) | |||
| for (i=0;i<nums;i++) | |||
| if (CPU_ISSET(i,&cpuset)) n++; | |||
| if (CPU_ISSET(i,cpusetp)) n++; | |||
| nums=n; | |||
| #else | |||
| nums = CPU_COUNT(sizeof(cpuset),&cpuset); | |||
| nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp); | |||
| #endif | |||
| return nums; | |||
| #else | |||
| if (nums >= CPU_SETSIZE) { | |||
| cpusetp = CPU_ALLOC(nums); | |||
| if (cpusetp == NULL) { | |||
| return nums; | |||
| } | |||
| size = CPU_ALLOC_SIZE(nums); | |||
| ret = sched_getaffinity(0,size,cpusetp); | |||
| if (ret!=0) { | |||
| CPU_FREE(cpusetp); | |||
| return nums; | |||
| } | |||
| ret = CPU_COUNT_S(size,cpusetp); | |||
| if (ret > 0 && ret < nums) nums = ret; | |||
| CPU_FREE(cpusetp); | |||
| return nums; | |||
| } else { | |||
| ret = sched_getaffinity(0,sizeof(cpuset),&cpuset); | |||
| if (ret!=0) { | |||
| return nums; | |||
| } | |||
| ret = CPU_COUNT(&cpuset); | |||
| if (ret > 0 && ret < nums) nums = ret; | |||
| return nums; | |||
| } | |||
| cpusetp = CPU_ALLOC(nums); | |||
| if (cpusetp == NULL) return nums; | |||
| size = CPU_ALLOC_SIZE(nums); | |||
| ret = sched_getaffinity(0,size,cpusetp); | |||
| if (ret!=0) return nums; | |||
| nums = CPU_COUNT_S(size,cpusetp); | |||
| CPU_FREE(cpusetp); | |||
| return nums; | |||
| #endif | |||
| #endif | |||
| } | |||
| @@ -1840,7 +1753,7 @@ int get_num_procs(void) { | |||
| return nums; | |||
| } | |||
| #endif | |||
| #ifdef OS_HAIKU | |||
| int get_num_procs(void) { | |||
| static int nums = 0; | |||
| @@ -1877,7 +1790,7 @@ int get_num_procs(void) { | |||
| #endif | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) | |||
| #if defined(OS_FREEBSD) | |||
| int get_num_procs(void) { | |||
| @@ -1954,7 +1867,7 @@ void openblas_fork_handler() | |||
| // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035 | |||
| // In the mean time build with USE_OPENMP=0 or link against another | |||
| // implementation of OpenMP. | |||
| #if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER) | |||
| #if !(defined(OS_WINDOWS) || defined(OS_ANDROID)) && defined(SMP_SERVER) | |||
| int err; | |||
| err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL); | |||
| if(err != 0) | |||
| @@ -1967,7 +1880,7 @@ extern int openblas_goto_num_threads_env(); | |||
| extern int openblas_omp_num_threads_env(); | |||
| int blas_get_cpu_number(void){ | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| int max_num; | |||
| #endif | |||
| int blas_goto_num = 0; | |||
| @@ -1975,11 +1888,11 @@ int blas_get_cpu_number(void){ | |||
| if (blas_num_threads) return blas_num_threads; | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| max_num = get_num_procs(); | |||
| #endif | |||
| // blas_goto_num = 0; | |||
| blas_goto_num = 0; | |||
| #ifndef USE_OPENMP | |||
| blas_goto_num=openblas_num_threads_env(); | |||
| if (blas_goto_num < 0) blas_goto_num = 0; | |||
| @@ -1991,7 +1904,7 @@ int blas_get_cpu_number(void){ | |||
| #endif | |||
| // blas_omp_num = 0; | |||
| blas_omp_num = 0; | |||
| blas_omp_num=openblas_omp_num_threads_env(); | |||
| if (blas_omp_num < 0) blas_omp_num = 0; | |||
| @@ -1999,7 +1912,7 @@ int blas_get_cpu_number(void){ | |||
| else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; | |||
| else blas_num_threads = MAX_CPU_NUMBER; | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| if (blas_num_threads > max_num) blas_num_threads = max_num; | |||
| #endif | |||
| @@ -2063,12 +1976,8 @@ static BLASULONG alloc_lock = 0UL; | |||
| static void alloc_mmap_free(struct release_t *release){ | |||
| if (!release->address) return; | |||
| if (munmap(release -> address, BUFFER_SIZE)) { | |||
| int errsv=errno; | |||
| perror("OpenBLAS : munmap failed:"); | |||
| printf("error code=%d,\trelease->address=%lx\n",errsv,release->address); | |||
| printf("OpenBLAS : munmap failed\n"); | |||
| } | |||
| } | |||
| @@ -2090,21 +1999,11 @@ static void *alloc_mmap(void *address){ | |||
| } | |||
| if (map_address != (void *)-1) { | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| LOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| release_info[release_pos].address = map_address; | |||
| release_info[release_pos].func = alloc_mmap_free; | |||
| release_pos ++; | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| } else { | |||
| #ifdef DEBUG | |||
| int errsv=errno; | |||
| perror("OpenBLAS : mmap failed:"); | |||
| printf("error code=%d,\tmap_address=%lx\n",errsv,map_address); | |||
| #endif | |||
| } | |||
| #ifdef OS_LINUX | |||
| @@ -2246,18 +2145,14 @@ static void *alloc_mmap(void *address){ | |||
| #if defined(OS_LINUX) && !defined(NO_WARMUP) | |||
| } | |||
| #endif | |||
| LOCK_COMMAND(&alloc_lock); | |||
| if (map_address != (void *)-1) { | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| LOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| release_info[release_pos].address = map_address; | |||
| release_info[release_pos].func = alloc_mmap_free; | |||
| release_pos ++; | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| } | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| return map_address; | |||
| } | |||
| @@ -2332,7 +2227,7 @@ static void *alloc_qalloc(void *address){ | |||
| static void alloc_windows_free(struct release_t *release){ | |||
| VirtualFree(release -> address, 0, MEM_RELEASE); | |||
| VirtualFree(release -> address, BUFFER_SIZE, MEM_DECOMMIT); | |||
| } | |||
| @@ -2454,7 +2349,7 @@ static void alloc_hugetlb_free(struct release_t *release){ | |||
| #ifdef OS_WINDOWS | |||
| VirtualFree(release -> address, 0, MEM_LARGE_PAGES | MEM_RELEASE); | |||
| VirtualFree(release -> address, BUFFER_SIZE, MEM_LARGE_PAGES | MEM_DECOMMIT); | |||
| #endif | |||
| @@ -2625,7 +2520,7 @@ void *blas_memory_alloc(int procpos){ | |||
| int position; | |||
| #if defined(WHEREAMI) && !defined(USE_OPENMP) | |||
| int mypos = 0; | |||
| int mypos; | |||
| #endif | |||
| void *map_address; | |||
| @@ -2656,11 +2551,6 @@ void *blas_memory_alloc(int procpos){ | |||
| NULL, | |||
| }; | |||
| void *(**func)(void *address); | |||
| #if defined(USE_OPENMP) | |||
| if (!memory_initialized) { | |||
| #endif | |||
| LOCK_COMMAND(&alloc_lock); | |||
| if (!memory_initialized) { | |||
| @@ -2696,9 +2586,6 @@ void *blas_memory_alloc(int procpos){ | |||
| } | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| #if defined(USE_OPENMP) | |||
| } | |||
| #endif | |||
| #ifdef DEBUG | |||
| printf("Alloc Start ...\n"); | |||
| @@ -2713,17 +2600,13 @@ void *blas_memory_alloc(int procpos){ | |||
| do { | |||
| if (!memory[position].used && (memory[position].pos == mypos)) { | |||
| #if defined(SMP) && !defined(USE_OPENMP) | |||
| LOCK_COMMAND(&alloc_lock); | |||
| #else | |||
| blas_lock(&memory[position].lock); | |||
| #endif | |||
| // blas_lock(&memory[position].lock); | |||
| if (!memory[position].used) goto allocation; | |||
| #if defined(SMP) && !defined(USE_OPENMP) | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| #else | |||
| blas_unlock(&memory[position].lock); | |||
| #endif | |||
| // blas_unlock(&memory[position].lock); | |||
| } | |||
| position ++; | |||
| @@ -2735,26 +2618,21 @@ void *blas_memory_alloc(int procpos){ | |||
| position = 0; | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| LOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| do { | |||
| #if defined(USE_OPENMP) | |||
| if (!memory[position].used) { | |||
| blas_lock(&memory[position].lock); | |||
| #endif | |||
| /* if (!memory[position].used) { */ | |||
| /* blas_lock(&memory[position].lock);*/ | |||
| if (!memory[position].used) goto allocation; | |||
| #if defined(USE_OPENMP) | |||
| blas_unlock(&memory[position].lock); | |||
| } | |||
| #endif | |||
| /* blas_unlock(&memory[position].lock);*/ | |||
| /* } */ | |||
| position ++; | |||
| } while (position < NUM_BUFFERS); | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| goto error; | |||
| allocation : | |||
| @@ -2764,11 +2642,10 @@ void *blas_memory_alloc(int procpos){ | |||
| #endif | |||
| memory[position].used = 1; | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| #else | |||
| blas_unlock(&memory[position].lock); | |||
| #endif | |||
| /* blas_unlock(&memory[position].lock);*/ | |||
| if (!memory[position].addr) { | |||
| do { | |||
| #ifdef DEBUG | |||
| @@ -2785,7 +2662,7 @@ void *blas_memory_alloc(int procpos){ | |||
| #ifdef ALLOC_DEVICEDRIVER | |||
| if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { | |||
| fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n"); | |||
| fprintf(stderr, "OpenBLAS Warning ... Physically contigous allocation was failed.\n"); | |||
| } | |||
| #endif | |||
| @@ -2813,13 +2690,9 @@ void *blas_memory_alloc(int procpos){ | |||
| } while ((BLASLONG)map_address == -1); | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| LOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| memory[position].addr = map_address; | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| #ifdef DEBUG | |||
| printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position); | |||
| @@ -2873,9 +2746,8 @@ void blas_memory_free(void *free_area){ | |||
| #endif | |||
| position = 0; | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| LOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) | |||
| position++; | |||
| @@ -2889,9 +2761,7 @@ void blas_memory_free(void *free_area){ | |||
| WMB; | |||
| memory[position].used = 0; | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| #ifdef DEBUG | |||
| printf("Unmap Succeeded.\n\n"); | |||
| @@ -2906,9 +2776,8 @@ void blas_memory_free(void *free_area){ | |||
| for (position = 0; position < NUM_BUFFERS; position++) | |||
| printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used); | |||
| #endif | |||
| #if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) | |||
| UNLOCK_COMMAND(&alloc_lock); | |||
| #endif | |||
| return; | |||
| } | |||
| @@ -2958,7 +2827,7 @@ void blas_shutdown(void){ | |||
| #if defined(OS_LINUX) && !defined(NO_WARMUP) | |||
| #if defined(SMP) || defined(USE_LOCKING) | |||
| #ifdef SMP | |||
| #if defined(USE_PTHREAD_LOCK) | |||
| static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; | |||
| #elif defined(USE_PTHREAD_SPINLOCK) | |||
| @@ -2983,7 +2852,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, | |||
| if (hot_alloc != 2) { | |||
| #endif | |||
| #if defined(SMP) || defined(USE_LOCKING) | |||
| #ifdef SMP | |||
| LOCK_COMMAND(&init_lock); | |||
| #endif | |||
| @@ -2993,7 +2862,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, | |||
| size -= PAGESIZE; | |||
| } | |||
| #if defined(SMP) || defined(USE_LOCKING) | |||
| #ifdef SMP | |||
| UNLOCK_COMMAND(&init_lock); | |||
| #endif | |||
| @@ -3226,7 +3095,7 @@ void gotoblas_dummy_for_PGI(void) { | |||
| gotoblas_init(); | |||
| gotoblas_quit(); | |||
| #if __PGIC__ < 19 | |||
| #if 0 | |||
| asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text"); | |||
| asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text"); | |||
| @@ -3234,7 +3103,6 @@ void gotoblas_dummy_for_PGI(void) { | |||
| asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text"); | |||
| asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text"); | |||
| #endif | |||
| #endif | |||
| } | |||
| #endif | |||