Browse Source

Merge pull request #1618 from oon3m0oo/less_locking

Remove the need for most locking in memory.c.
tags/v0.3.1
Martin Kroeker GitHub 7 years ago
parent
commit
12603b7dbb
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 43 additions and 156 deletions
  1. +43
    -156
      driver/others/memory.c

+ 43
- 156
driver/others/memory.c View File

@@ -13,9 +13,9 @@ met:
notice, this list of conditions and the following disclaimer in notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the the documentation and/or other materials provided with the
distribution. distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission. permission.


THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
@@ -139,6 +139,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define FIXED_PAGESIZE 4096 #define FIXED_PAGESIZE 4096
#endif #endif


#ifndef BUFFERS_PER_THREAD
#ifdef USE_OPENMP
#define BUFFERS_PER_THREAD (MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)
#else
#define BUFFERS_PER_THREAD NUM_BUFFERS
#endif
#endif

#define BITMASK(a, b, c) ((((a) >> (b)) & (c))) #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))


#if defined(_MSC_VER) && !defined(__clang__) #if defined(_MSC_VER) && !defined(__clang__)
@@ -213,7 +221,7 @@ int i,n;
ret = sched_getaffinity(0,size,cpusetp); ret = sched_getaffinity(0,size,cpusetp);
if (ret!=0) return nums; if (ret!=0) return nums;
ret = CPU_COUNT_S(size,cpusetp); ret = CPU_COUNT_S(size,cpusetp);
if (ret > 0 && ret < nums) nums = ret;
if (ret > 0 && ret < nums) nums = ret;
CPU_FREE(cpusetp); CPU_FREE(cpusetp);
return nums; return nums;
#endif #endif
@@ -415,8 +423,15 @@ struct release_t {


int hugetlb_allocated = 0; int hugetlb_allocated = 0;


static struct release_t release_info[NUM_BUFFERS];
static int release_pos = 0;
#if defined(OS_WINDOWS)
#define THREAD_LOCAL __declspec(thread)
#define UNLIKELY_TO_BE_ZERO(x) (x)
#else
#define THREAD_LOCAL __thread
#define UNLIKELY_TO_BE_ZERO(x) (__builtin_expect(x, 0))
#endif
static struct release_t THREAD_LOCAL release_info[BUFFERS_PER_THREAD];
static int THREAD_LOCAL release_pos = 0;


#if defined(OS_LINUX) && !defined(NO_WARMUP) #if defined(OS_LINUX) && !defined(NO_WARMUP)
static int hot_alloc = 0; static int hot_alloc = 0;
@@ -459,15 +474,9 @@ static void *alloc_mmap(void *address){
} }


if (map_address != (void *)-1) { if (map_address != (void *)-1) {
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
release_info[release_pos].address = map_address; release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free; release_info[release_pos].func = alloc_mmap_free;
release_pos ++; release_pos ++;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif
} }


#ifdef OS_LINUX #ifdef OS_LINUX
@@ -611,15 +620,9 @@ static void *alloc_mmap(void *address){
#endif #endif


if (map_address != (void *)-1) { if (map_address != (void *)-1) {
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
release_info[release_pos].address = map_address; release_info[release_pos].address = map_address;
release_info[release_pos].func = alloc_mmap_free; release_info[release_pos].func = alloc_mmap_free;
release_pos ++; release_pos ++;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif
} }


return map_address; return map_address;
@@ -872,7 +875,7 @@ static void *alloc_hugetlb(void *address){


tp.PrivilegeCount = 1; tp.PrivilegeCount = 1;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) { if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) {
CloseHandle(hToken); CloseHandle(hToken);
return (void*)-1; return (void*)-1;
@@ -961,20 +964,17 @@ static BLASULONG base_address = 0UL;
static BLASULONG base_address = BASE_ADDRESS; static BLASULONG base_address = BASE_ADDRESS;
#endif #endif


static volatile struct {
BLASULONG lock;
struct memory_t {
void *addr; void *addr;
#if defined(WHEREAMI) && !defined(USE_OPENMP)
int pos;
#endif
int used; int used;
#ifndef __64BIT__ #ifndef __64BIT__
char dummy[48]; char dummy[48];
#else #else
char dummy[40]; char dummy[40];
#endif #endif
};


} memory[NUM_BUFFERS];
static struct memory_t THREAD_LOCAL memory[BUFFERS_PER_THREAD];


static int memory_initialized = 0; static int memory_initialized = 0;


@@ -987,9 +987,6 @@ static int memory_initialized = 0;
void *blas_memory_alloc(int procpos){ void *blas_memory_alloc(int procpos){


int position; int position;
#if defined(WHEREAMI) && !defined(USE_OPENMP)
int mypos;
#endif


void *map_address; void *map_address;


@@ -1020,102 +1017,48 @@ void *blas_memory_alloc(int procpos){
}; };
void *(**func)(void *address); void *(**func)(void *address);


#if defined(USE_OPENMP)
if (!memory_initialized) {
#endif

LOCK_COMMAND(&alloc_lock);
if (UNLIKELY_TO_BE_ZERO(memory_initialized)) {


if (!memory_initialized) {
/* Only allow a single thread to initialize memory system */
LOCK_COMMAND(&alloc_lock);


#if defined(WHEREAMI) && !defined(USE_OPENMP)
for (position = 0; position < NUM_BUFFERS; position ++){
memory[position].addr = (void *)0;
memory[position].pos = -1;
memory[position].used = 0;
memory[position].lock = 0;
}
#endif
if (!memory_initialized) {


#ifdef DYNAMIC_ARCH #ifdef DYNAMIC_ARCH
gotoblas_dynamic_init();
gotoblas_dynamic_init();
#endif #endif


#if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY) #if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
gotoblas_affinity_init();
gotoblas_affinity_init();
#endif #endif


#ifdef SMP #ifdef SMP
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
#endif #endif


#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) #if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
#ifndef DYNAMIC_ARCH #ifndef DYNAMIC_ARCH
blas_set_parameter();
blas_set_parameter();
#endif #endif
#endif #endif


memory_initialized = 1;
memory_initialized = 1;


}
UNLOCK_COMMAND(&alloc_lock);
} }
UNLOCK_COMMAND(&alloc_lock);
#if defined(USE_OPENMP)
}
#endif


#ifdef DEBUG #ifdef DEBUG
printf("Alloc Start ...\n"); printf("Alloc Start ...\n");
#endif

#if defined(WHEREAMI) && !defined(USE_OPENMP)

mypos = WhereAmI();

position = mypos;
while (position >= NUM_BUFFERS) position >>= 1;

do {
if (!memory[position].used && (memory[position].pos == mypos)) {
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#else
blas_lock(&memory[position].lock);
#endif
if (!memory[position].used) goto allocation;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#else
blas_unlock(&memory[position].lock);
#endif
}

position ++;

} while (position < NUM_BUFFERS);


#endif #endif


position = 0; position = 0;


do { do {
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#else
if (!memory[position].used) {
blas_lock(&memory[position].lock);
#endif
if (!memory[position].used) goto allocation; if (!memory[position].used) goto allocation;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#else
blas_unlock(&memory[position].lock);
}
#endif

position ++; position ++;


} while (position < NUM_BUFFERS);
} while (position < BUFFERS_PER_THREAD);


goto error; goto error;


@@ -1126,11 +1069,6 @@ void *blas_memory_alloc(int procpos){
#endif #endif


memory[position].used = 1; memory[position].used = 1;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#else
blas_unlock(&memory[position].lock);
#endif


if (!memory[position].addr) { if (!memory[position].addr) {
do { do {
@@ -1148,14 +1086,14 @@ void *blas_memory_alloc(int procpos){


#ifdef ALLOC_DEVICEDRIVER #ifdef ALLOC_DEVICEDRIVER
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) { if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
fprintf(stderr, "OpenBLAS Warning ... Physically contigous allocation was failed.\n");
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation failed.\n");
} }
#endif #endif


#ifdef ALLOC_HUGETLBFILE #ifdef ALLOC_HUGETLBFILE
if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) { if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) {
#ifndef OS_WINDOWS #ifndef OS_WINDOWS
fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n");
fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation failed.\n");
#endif #endif
} }
#endif #endif
@@ -1176,44 +1114,13 @@ void *blas_memory_alloc(int procpos){


} while ((BLASLONG)map_address == -1); } while ((BLASLONG)map_address == -1);


#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
memory[position].addr = map_address; memory[position].addr = map_address;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif


#ifdef DEBUG #ifdef DEBUG
printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position); printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
#endif #endif
} }


#if defined(WHEREAMI) && !defined(USE_OPENMP)

if (memory[position].pos == -1) memory[position].pos = mypos;

#endif

#ifdef DYNAMIC_ARCH

if (memory_initialized == 1) {

LOCK_COMMAND(&alloc_lock);

if (memory_initialized == 1) {

if (!gotoblas) gotoblas_dynamic_init();

memory_initialized = 2;
}

UNLOCK_COMMAND(&alloc_lock);

}
#endif


#ifdef DEBUG #ifdef DEBUG
printf("Mapped : %p %3d\n\n", printf("Mapped : %p %3d\n\n",
(void *)memory[position].addr, position); (void *)memory[position].addr, position);
@@ -1222,7 +1129,7 @@ void *blas_memory_alloc(int procpos){
return (void *)memory[position].addr; return (void *)memory[position].addr;


error: error:
printf("BLAS : Program is Terminated. Because you tried to allocate too many memory regions.\n");
printf("OpenBLAS : Program will terminate because you tried to allocate too many memory regions.\n");


return NULL; return NULL;
} }
@@ -1236,10 +1143,7 @@ void blas_memory_free(void *free_area){
#endif #endif


position = 0; position = 0;
#if defined(SMP) && !defined(USE_OPENMP)
LOCK_COMMAND(&alloc_lock);
#endif
while ((position < NUM_BUFFERS) && (memory[position].addr != free_area))
while ((position < BUFFERS_PER_THREAD) && (memory[position].addr != free_area))
position++; position++;


if (memory[position].addr != free_area) goto error; if (memory[position].addr != free_area) goto error;
@@ -1248,13 +1152,7 @@ void blas_memory_free(void *free_area){
printf(" Position : %d\n", position); printf(" Position : %d\n", position);
#endif #endif


// arm: ensure all writes are finished before other thread takes this memory
WMB;

memory[position].used = 0; memory[position].used = 0;
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif


#ifdef DEBUG #ifdef DEBUG
printf("Unmap Succeeded.\n\n"); printf("Unmap Succeeded.\n\n");
@@ -1266,11 +1164,8 @@ void blas_memory_free(void *free_area){
printf("BLAS : Bad memory unallocation! : %4d %p\n", position, free_area); printf("BLAS : Bad memory unallocation! : %4d %p\n", position, free_area);


#ifdef DEBUG #ifdef DEBUG
for (position = 0; position < NUM_BUFFERS; position++)
for (position = 0; position < BUFFERS_PER_THREAD; position++)
printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used); printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
#endif
#if defined(SMP) && !defined(USE_OPENMP)
UNLOCK_COMMAND(&alloc_lock);
#endif #endif
return; return;
} }
@@ -1293,8 +1188,6 @@ void blas_shutdown(void){
BLASFUNC(blas_thread_shutdown)(); BLASFUNC(blas_thread_shutdown)();
#endif #endif


LOCK_COMMAND(&alloc_lock);

for (pos = 0; pos < release_pos; pos ++) { for (pos = 0; pos < release_pos; pos ++) {
release_info[pos].func(&release_info[pos]); release_info[pos].func(&release_info[pos]);
} }
@@ -1305,17 +1198,11 @@ void blas_shutdown(void){
base_address = BASE_ADDRESS; base_address = BASE_ADDRESS;
#endif #endif


for (pos = 0; pos < NUM_BUFFERS; pos ++){
for (pos = 0; pos < BUFFERS_PER_THREAD; pos ++){
memory[pos].addr = (void *)0; memory[pos].addr = (void *)0;
memory[pos].used = 0; memory[pos].used = 0;
#if defined(WHEREAMI) && !defined(USE_OPENMP)
memory[pos].pos = -1;
#endif
memory[pos].lock = 0;
} }


UNLOCK_COMMAND(&alloc_lock);

return; return;
} }




Loading…
Cancel
Save