Browse Source

Merge pull request #1491 from martin-frbg/ddot_mt

Add multithreading support for Haswell DDOT
tags/v0.3.0
Martin Kroeker GitHub 7 years ago
parent
commit
840e01061f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 66 additions and 1 deletions
  1. +66
    -1
      kernel/x86_64/ddot.c

+ 66
- 1
kernel/x86_64/ddot.c View File

@@ -43,6 +43,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ddot_microk_sandy-2.c"
#endif

#if !defined(DSDOT)
#define RETURN_TYPE FLOAT
#else
#define RETURN_TYPE double
#endif


#ifndef HAVE_KERNEL_8

@@ -71,7 +77,7 @@ static void ddot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)

#endif

FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
static FLOAT dot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
@@ -139,4 +145,63 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)

}

#if defined(SMP)
static int dot_thread_function(BLASLONG n, BLASLONG dummy0,
BLASLONG dummy1, FLOAT dummy2, FLOAT *x, BLASLONG inc_x, FLOAT *y,
BLASLONG inc_y, RETURN_TYPE *result, BLASLONG dummy3)
{
*(RETURN_TYPE *)result = dot_compute(n, x, inc_x, y, inc_y);

return 0;
}

extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n,
BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads);
#endif

FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{
#if defined(SMP)
int nthreads;
FLOAT dummy_alpha;
#endif
FLOAT dot = 0.0;

#if defined(SMP)
nthreads = num_cpu_avail(1);

if (inc_x == 0 || inc_y == 0)
nthreads = 1;

if (n <= 10000)
nthreads = 1;

if (nthreads == 1) {
dot = dot_compute(n, x, inc_x, y, inc_y);
} else {
int mode, i;
char result[MAX_CPU_NUMBER * sizeof(double) * 2];
RETURN_TYPE *ptr;

#if !defined(DOUBLE)
mode = BLAS_SINGLE | BLAS_REAL;
#else
mode = BLAS_DOUBLE | BLAS_REAL;
#endif
blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha,
x, inc_x, y, inc_y, result, 0,
( void *)dot_thread_function, nthreads);

ptr = (RETURN_TYPE *)result;
for (i = 0; i < nthreads; i++) {
dot = dot + (*ptr);
ptr = (RETURN_TYPE *)(((char *)ptr) + sizeof(double) * 2);
}
}
#else
dot = dot_compute(n, x, inc_x, y, inc_y);
#endif

return dot;
}

Loading…
Cancel
Save