Browse Source

updated some level1 funcions, that are not thread save

tags/v0.2.20^2
Werner Saar 9 years ago
parent
commit
dd6212e684
4 changed files with 0 additions and 304 deletions
  1. +0
    -116
      interface/asum.c
  2. +0
    -50
      interface/copy.c
  3. +0
    -97
      interface/dot.c
  4. +0
    -41
      interface/rot.c

+ 0
- 116
interface/asum.c View File

@@ -42,24 +42,6 @@
#include "functable.h"
#endif

#ifdef SMP
static int asum_threads (BLASLONG m, BLASLONG n, BLASLONG k, float alpha,
float* x, BLASLONG incx, float* y, BLASLONG incy, float* z, BLASLONG incz)
{
#ifndef CBLAS
FLOATRET ret;
ret = (FLOATRET)ASUM_K(m, x, incx);
*((double *)z) = (double)ret;
#else
FLOAT ret;
ret = ASUM_K(m, x, incx);
*((double *)z) = (double)ret;
#endif

return 0;
}
#endif

#ifndef CBLAS

FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){
@@ -70,62 +52,14 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){

PRINT_DEBUG_NAME;

#ifdef SMP
int i;
int mode, nthreads;
double mid_result= 0.0;
FLOAT dummyalpha[2] = {ZERO, ZERO};
double *buffer = (double*)blas_memory_alloc(0);
#endif

if (n <= 0) return 0;

IDEBUG_START;

FUNCTION_PROFILE_START();

#ifdef SMP
nthreads = num_cpu_avail(1);

//Temporarily work-around the low performance issue with small imput size &
//multithreads.
if (n <= 100000)
nthreads = 1;

if (nthreads == 1) {
#endif

ret = (FLOATRET)ASUM_K(n, x, incx);

#ifdef SMP
} else {

#ifndef DOUBLE
#ifndef COMPLEX
mode = BLAS_SINGLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#else
#ifndef COMPLEX
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#endif
#endif

blas_level1_thread_with_return_value(mode, n, 0, 0, dummyalpha,
x, incx, NULL, 0, buffer, 0, (void *)asum_threads, nthreads);

for(i = 0; i < nthreads; i++)
mid_result += buffer[2*i];
ret = (FLOATRET)mid_result;
}

blas_memory_free(buffer);
#endif

FUNCTION_PROFILE_END(COMPSIZE, n, n);

IDEBUG_END;
@@ -141,68 +75,18 @@ FLOAT CNAME(blasint n, FLOAT *x, blasint incx){

PRINT_DEBUG_CNAME;

#ifdef SMP
int i;
int mode, nthreads;
double mid_result= 0.0;
FLOAT dummyalpha[2] = {ZERO, ZERO};

double *buffer = (double*)blas_memory_alloc(0);
#endif

if (n <= 0) return 0;

IDEBUG_START;

FUNCTION_PROFILE_START();

#ifdef SMP
nthreads = num_cpu_avail(1);

//Temporarily work-around the low performance issue with small imput size &
//multithreads.
if (n <= 100000)
nthreads = 1;

if (nthreads == 1) {
#endif

ret = ASUM_K(n, x, incx);

#ifdef SMP
} else {

#ifndef DOUBLE
#ifndef COMPLEX
mode = BLAS_SINGLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#else
#ifndef COMPLEX
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#endif
#endif

blas_level1_thread_with_return_value(mode, n, 0, 0, dummyalpha,
x, incx, NULL, 0, buffer, 0, (void *)asum_threads, nthreads);

for(i = 0; i < nthreads; i++)
mid_result += buffer[2*i];
ret = (FLOAT)mid_result;
}

blas_memory_free(buffer);
#endif

FUNCTION_PROFILE_END(COMPSIZE, n, n);

IDEBUG_END;


return ret;
}



+ 0
- 50
interface/copy.c View File

@@ -42,17 +42,6 @@
#include "functable.h"
#endif

#ifdef SMP

static int copy_threads (BLASLONG m, BLASLONG n, BLASLONG k, float alpha,
float* x, BLASLONG incx, float* y, BLASLONG incy, float* z, BLASLONG incz)
{
COPY_K(m, x, incx, y, incy);
return 0;
}

#endif

#ifndef CBLAS

void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){
@@ -71,11 +60,6 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){

#endif

#ifdef SMP
int mode, nthreads;
FLOAT dummyalpha[2] = {ZERO, ZERO};
#endif

if (n <= 0) return;

IDEBUG_START;
@@ -85,42 +69,8 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
if (incx < 0) x -= (n - 1) * incx * COMPSIZE;
if (incy < 0) y -= (n - 1) * incy * COMPSIZE;

#ifdef SMP
nthreads = num_cpu_avail(1);

//Temporarily work-around the low performance issue with small imput size &
//multithreads.
if (n <= 100000)
nthreads = 1;

if (nthreads == 1) {
#endif

COPY_K(n, x, incx, y, incy);

#ifdef SMP
} else {

#ifndef DOUBLE
#ifndef COMPLEX
mode = BLAS_SINGLE | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif
#else
#ifndef COMPLEX
mode = BLAS_DOUBLE | BLAS_REAL;
#else
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#endif
#endif

blas_level1_thread(mode, n, 0, 0, dummyalpha,
x, incx, y, incy, NULL, 0, (void *)copy_threads, nthreads);

}
#endif

FUNCTION_PROFILE_END(COMPSIZE, COMPSIZE * n, 0);

IDEBUG_END;


+ 0
- 97
interface/dot.c View File

@@ -42,24 +42,6 @@
#include "functable.h"
#endif

#ifdef SMP
static int dot_threads (BLASLONG m, BLASLONG n, BLASLONG k, float alpha,
float* x, BLASLONG incx, float* y, BLASLONG incy, float* z, BLASLONG incz)
{
#ifndef CBLAS
FLOATRET ret;
ret = (FLOATRET)DOTU_K(m, x, incx, y, incy);
*((double *)z) = (double)ret;
#else
FLOAT ret;
ret = DOTU_K(n, x, incx, y, incy);
*((double *)z) = (double)ret;
#endif

return 0;
}
#endif

#ifndef CBLAS

FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){
@@ -71,14 +53,6 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){

PRINT_DEBUG_NAME;

#ifdef SMP
int i;
int mode, nthreads;
double mid_result= 0.0;
FLOAT dummyalpha[2] = {ZERO, ZERO};
double *buffer = (double*)blas_memory_alloc(0);
#endif

if (n <= 0) return 0.;

IDEBUG_START;
@@ -88,40 +62,8 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){
if (incx < 0) x -= (n - 1) * incx;
if (incy < 0) y -= (n - 1) * incy;

#ifdef SMP
nthreads = num_cpu_avail(1);

//Temporarily work-around the low performance issue with small imput size &
//multithreads.
if (n <= 100000)
nthreads = 1;

if (nthreads == 1) {
#endif

ret = (FLOATRET)DOTU_K(n, x, incx, y, incy);

#ifdef SMP
} else {

#ifndef DOUBLE
mode = BLAS_SINGLE | BLAS_REAL;
#else
mode = BLAS_DOUBLE | BLAS_REAL;
#endif

blas_level1_thread_with_return_value(mode, n, 0, 0, dummyalpha,
x, incx, y, incy, buffer, 0, (void *)dot_threads, nthreads);

for(i = 0; i < nthreads; i++)
mid_result += buffer[2*i];
ret = (FLOATRET)mid_result;
}

blas_memory_free(buffer);
#endif

FUNCTION_PROFILE_END(1, 2 * n, 2 * n);

IDEBUG_END;
@@ -137,14 +79,6 @@ FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){

PRINT_DEBUG_CNAME;

#ifdef SMP
int i;
int mode, nthreads;
double mid_result= 0.0;
FLOAT dummyalpha[2] = {ZERO, ZERO};

double *buffer = (double*)blas_memory_alloc(0);
#endif
if (n <= 0) return 0.;

IDEBUG_START;
@@ -154,39 +88,8 @@ FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
if (incx < 0) x -= (n - 1) * incx;
if (incy < 0) y -= (n - 1) * incy;

#ifdef SMP
nthreads = num_cpu_avail(1);

//Temporarily work-around the low performance issue with small imput size &
//multithreads.
if (n <= 100000)
nthreads = 1;

if (nthreads == 1) {
#endif
ret = DOTU_K(n, x, incx, y, incy);

#ifdef SMP
} else {

#ifndef DOUBLE
mode = BLAS_SINGLE | BLAS_REAL;
#else
mode = BLAS_DOUBLE | BLAS_REAL;
#endif

blas_level1_thread_with_return_value(mode, n, 0, 0, dummyalpha,
x, incx, y, incy, buffer, 0, (void *)dot_threads, nthreads);

for(i = 0; i < nthreads; i++)
mid_result += buffer[2*i];
ret = (FLOAT)mid_result;
}

blas_memory_free(buffer);
#endif

FUNCTION_PROFILE_END(1, 2 * n, 2 * n);

IDEBUG_END;


+ 0
- 41
interface/rot.c View File

@@ -42,16 +42,6 @@
#include "functable.h"
#endif

#ifdef SMP
static int rot_threads (BLASLONG m, BLASLONG n, BLASLONG k, float alpha,
float* x, BLASLONG incx, float* y, BLASLONG incy, float* z, BLASLONG incz)
{
ROT_K(m, x, incx, y, incy, n, k);
return 0;
}

#endif

#ifndef CBLAS

void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY, FLOAT *C, FLOAT *S){
@@ -72,11 +62,6 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT c, F

#endif

#ifdef SMP
int mode, nthreads;
FLOAT dummyalpha[2] = {ZERO, ZERO};
#endif

if (n <= 0) return;

IDEBUG_START;
@@ -86,34 +71,8 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT c, F
if (incx < 0) x -= (n - 1) * incx;
if (incy < 0) y -= (n - 1) * incy;

#ifdef SMP
nthreads = num_cpu_avail(1);

//Temporarily work-around the low performance issue with small imput size &
//multithreads.
if (n <= 100000)
nthreads = 1;

if (nthreads == 1) {
#endif

ROT_K(n, x, incx, y, incy, c, s);

#ifdef SMP
} else {

#ifndef DOUBLE
mode = BLAS_SINGLE | BLAS_REAL;
#else
mode = BLAS_DOUBLE | BLAS_REAL;
#endif

blas_level1_thread(mode, n, c, s, dummyalpha,
x, incx, y, incy, NULL, 0, (void *)rot_threads, nthreads);

}
#endif

FUNCTION_PROFILE_END(1, n, n);

IDEBUG_END;


Loading…
Cancel
Save