|
|
@@ -58,21 +58,19 @@ static FLOAT dasum_kernel(BLASLONG n, FLOAT *x1) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) |
|
|
|
|
|
|
|
|
static FLOAT asum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) |
|
|
{ |
|
|
{ |
|
|
BLASLONG i=0; |
|
|
|
|
|
|
|
|
BLASLONG i = 0; |
|
|
FLOAT sumf = 0.0; |
|
|
FLOAT sumf = 0.0; |
|
|
|
|
|
|
|
|
|
|
|
if (n <= 0 || inc_x <= 0) return (sumf); |
|
|
|
|
|
|
|
|
if (n <= 0 || inc_x <= 0) return(sumf); |
|
|
|
|
|
|
|
|
|
|
|
if ( inc_x == 1 ) { |
|
|
|
|
|
|
|
|
if (inc_x == 1) { |
|
|
sumf = dasum_kernel(n, x); |
|
|
sumf = dasum_kernel(n, x); |
|
|
} |
|
|
|
|
|
|
|
|
} |
|
|
else { |
|
|
else { |
|
|
n *= inc_x; |
|
|
n *= inc_x; |
|
|
|
|
|
|
|
|
while(i < n) { |
|
|
|
|
|
|
|
|
while (i < n) { |
|
|
sumf += ABS_K(x[i]); |
|
|
sumf += ABS_K(x[i]); |
|
|
i += inc_x; |
|
|
i += inc_x; |
|
|
} |
|
|
} |
|
|
@@ -80,3 +78,53 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) |
|
|
return(sumf); |
|
|
return(sumf); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#if defined(SMP) |
|
|
|
|
|
static int asum_thread_function(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy2, FLOAT *x, BLASLONG inc_x, FLOAT *dummy3, BLASLONG dummy4, FLOAT *result, BLASLONG dummy5) |
|
|
|
|
|
{ |
|
|
|
|
|
*(FLOAT *)result = asum_compute(n, x, inc_x); |
|
|
|
|
|
return 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads); |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) |
|
|
|
|
|
{ |
|
|
|
|
|
#if defined(SMP) |
|
|
|
|
|
int nthreads; |
|
|
|
|
|
FLOAT dummy_alpha; |
|
|
|
|
|
FLOAT * dummy_b; |
|
|
|
|
|
#endif |
|
|
|
|
|
FLOAT sumf = 0.0; |
|
|
|
|
|
|
|
|
|
|
|
#if defined(SMP) |
|
|
|
|
|
int num_cpu = num_cpu_avail(1); |
|
|
|
|
|
if (n <= 100000 || inc_x <= 0) |
|
|
|
|
|
nthreads = 1; |
|
|
|
|
|
else |
|
|
|
|
|
nthreads = num_cpu < n/100000 ? num_cpu : n/100000; |
|
|
|
|
|
|
|
|
|
|
|
if (nthreads == 1) { |
|
|
|
|
|
sumf = asum_compute(n, x, inc_x); |
|
|
|
|
|
} else { |
|
|
|
|
|
int mode, i; |
|
|
|
|
|
char result[MAX_CPU_NUMBER * sizeof(double) *2]; |
|
|
|
|
|
FLOAT *ptr; |
|
|
|
|
|
#if !defined(DOUBLE) |
|
|
|
|
|
mode = BLAS_SINGLE | BLAS_REAL; |
|
|
|
|
|
#else |
|
|
|
|
|
mode = BLAS_DOUBLE | BLAS_REAL; |
|
|
|
|
|
#endif |
|
|
|
|
|
blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, dummy_b, 0, result, 0, (void *)asum_thread_function, nthreads); |
|
|
|
|
|
ptr = (FLOAT *)result; |
|
|
|
|
|
for (i = 0; i < nthreads; i++) { |
|
|
|
|
|
sumf += (*ptr); |
|
|
|
|
|
ptr = (FLOAT *)(((char *)ptr) + sizeof(double) *2); |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
#else |
|
|
|
|
|
sumf = asum_compute(n, x, inc_x); |
|
|
|
|
|
#endif |
|
|
|
|
|
return(sumf); |
|
|
|
|
|
} |
|
|
|
|
|
|