Browse Source

Refs #174. Fixed the overflowing buffer bug of multithreading hbmv and sbmv.

Instead of using thread 0 buffer, each thread uses its own sb buffer.
Thus, it can avoid overflowing thread 0 buffer.
tags/v0.2.6
Zhang Xianyi 13 years ago
parent
commit
5155e3f509
2 changed files with 7 additions and 4 deletions
  1. +6
    -4
      driver/level2/sbmv_thread.c
  2. +1
    -0
      driver/others/blas_server.c

+ 6
- 4
driver/level2/sbmv_thread.c View File

@@ -65,7 +65,6 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F

a = (FLOAT *)args -> a;
x = (FLOAT *)args -> b;
y = (FLOAT *)args -> c;

lda = args -> lda;
incx = args -> ldb;
@@ -76,6 +75,10 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
n_from = 0;
n_to = n;

//Use y as each thread's n* COMPSIZE elements in sb buffer
y = buffer;
buffer += ((COMPSIZE * n + 1023) & ~1023);

if (range_m) {
n_from = *(range_m + 0);
n_to = *(range_m + 1);
@@ -83,7 +86,6 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
a += n_from * lda * COMPSIZE;
}

if (range_n) y += *range_n * COMPSIZE;

if (incx != 1) {
COPY_K(n, x, incx, buffer, 1);
@@ -331,7 +333,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x

if (num_cpu) {
queue[0].sa = NULL;
queue[0].sb = buffer + num_cpu * (((n + 255) & ~255) + 16) * COMPSIZE;
queue[0].sb = buffer;
queue[num_cpu - 1].next = NULL;
exec_blas(num_cpu, queue);
@@ -344,7 +346,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
#else
ONE, ZERO,
#endif
buffer + range_n[i] * COMPSIZE, 1, buffer, 1, NULL, 0);
(FLOAT*)(queue[i].sb), 1, buffer, 1, NULL, 0);
}

AXPYU_K(n, 0, 0,


+ 1
- 0
driver/others/blas_server.c View File

@@ -385,6 +385,7 @@ static int blas_thread_server(void *arg){
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
}
}
queue->sb=sb;
}
#ifdef MONITOR


Loading…
Cancel
Save