| @@ -5,6 +5,12 @@ Version 0.1 alpha2(in development) | |||
| common: | |||
| * Fixed blasint undefined bug in <cblas.h> file. Other software | |||
| could include this header successfully(Refs issue #13 on github) | |||
| * Fixed the SEGFAULT bug on 64 cores. On SMP server, the number | |||
| of CPUs or cores should be less than or equal to 64.(Refs issue #14 | |||
| on github) | |||
| * Support "void goto_set_num_threads(int num_threads)" and "void | |||
| openblas_set_num_threads(int num_threads)" when USE_OPENMP=1 | |||
| x86/x86_64: | |||
| * | |||
| MIPS64: | |||
| @@ -39,13 +39,17 @@ export GOTO_NUM_THREADS=4 | |||
| or | |||
| export OMP_NUM_THREADS=4 | |||
| The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS. | |||
| The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS. | |||
| If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable. | |||
| 4.2 Set the number of threads with calling functions. for example, | |||
| void goto_set_num_threads(int num_threads); | |||
| or | |||
| void openblas_set_num_threads(int num_threads); | |||
| If you compile this lib with USE_OPENMP=1, you should use the above functions, too. | |||
| 5.Report Bugs | |||
| Please add a issue in https://github.com/xianyi/OpenBLAS/issues | |||
| @@ -56,4 +60,8 @@ Optimization on ICT Loongson 3A CPU | |||
| OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas | |||
| 8.ChangeLog | |||
| Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. | |||
| Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. | |||
| 9.Known Issues | |||
| * The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit | |||
| is 64. On 32 bits, it is 32. | |||
| @@ -49,6 +49,26 @@ | |||
| int blas_server_avail = 0; | |||
| void goto_set_num_threads(int num_threads) { | |||
| if (num_threads < 1) num_threads = blas_num_threads; | |||
| if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; | |||
| if (num_threads > blas_num_threads) { | |||
| blas_num_threads = num_threads; | |||
| } | |||
| blas_cpu_number = num_threads; | |||
| omp_set_num_threads(blas_cpu_number); | |||
| } | |||
| void openblas_set_num_threads(int num_threads) { | |||
| goto_set_num_threads(num_threads); | |||
| } | |||
| int blas_thread_init(void){ | |||
| blas_get_cpu_number(); | |||
| @@ -172,13 +172,20 @@ static inline int rcount(unsigned long number) { | |||
| return count; | |||
| } | |||
| /*** | |||
| Known issue: The number of CPUs/cores should less | |||
| than sizeof(unsigned long). On 64 bits, the limit | |||
| is 64. On 32 bits, it is 32. | |||
| ***/ | |||
| static inline unsigned long get_cpumap(int node) { | |||
| int infile; | |||
| unsigned long affinity; | |||
| char name[160]; | |||
| char cpumap[160]; | |||
| char *p, *dummy; | |||
| int i=0; | |||
| sprintf(name, CPUMAP_NAME, node); | |||
| infile = open(name, O_RDONLY); | |||
| @@ -187,13 +194,19 @@ static inline unsigned long get_cpumap(int node) { | |||
| if (infile != -1) { | |||
| read(infile, name, sizeof(name)); | |||
| read(infile, cpumap, sizeof(cpumap)); | |||
| p = cpumap; | |||
| while (*p != '\n' && i<160){ | |||
| if(*p != ',') { | |||
| name[i++]=*p; | |||
| } | |||
| p++; | |||
| } | |||
| p = name; | |||
| while ((*p == '0') || (*p == ',')) p++; | |||
| // while ((*p == '0') || (*p == ',')) p++; | |||
| affinity = strtol(p, &dummy, 16); | |||
| affinity = strtoul(p, &dummy, 16); | |||
| close(infile); | |||
| } | |||
| @@ -347,7 +360,13 @@ static void disable_hyperthread(void) { | |||
| unsigned long share; | |||
| int cpu; | |||
| common -> avail = (1UL << common -> num_procs) - 1; | |||
| if(common->num_procs > 64){ | |||
| fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); | |||
| exit(1); | |||
| }else if(common->num_procs == 64){ | |||
| common -> avail = 0xFFFFFFFFFFFFFFFFUL; | |||
| }else | |||
| common -> avail = (1UL << common -> num_procs) - 1; | |||
| #ifdef DEBUG | |||
| fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail); | |||
| @@ -376,7 +395,13 @@ static void disable_affinity(void) { | |||
| fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]); | |||
| #endif | |||
| lprocmask = (1UL << common -> final_num_procs) - 1; | |||
| if(common->final_num_procs > 64){ | |||
| fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); | |||
| exit(1); | |||
| }else if(common->final_num_procs == 64){ | |||
| lprocmask = 0xFFFFFFFFFFFFFFFFUL; | |||
| }else | |||
| lprocmask = (1UL << common -> final_num_procs) - 1; | |||
| #ifndef USE_OPENMP | |||
| lprocmask &= *(unsigned long *)&cpu_orig_mask[0]; | |||