Browse Source

Merge pull request #3493 from martin-frbg/casts+cleanup

WIP casts and cleanups
tags/v0.3.20
Martin Kroeker GitHub 4 years ago
parent
commit
7656aba00e
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 55 additions and 38 deletions
  1. +5
    -4
      cpuid_x86.c
  2. +28
    -12
      driver/others/blas_server.c
  3. +1
    -1
      interface/axpy.c
  4. +1
    -1
      interface/scal.c
  5. +2
    -2
      interface/zaxpy.c
  6. +1
    -1
      interface/zscal.c
  7. +1
    -1
      kernel/x86_64/casum.c
  8. +1
    -1
      kernel/x86_64/dasum.c
  9. +1
    -1
      kernel/x86_64/ddot.c
  10. +1
    -1
      kernel/x86_64/drot.c
  11. +1
    -1
      kernel/x86_64/sasum.c
  12. +1
    -1
      kernel/x86_64/srot.c
  13. +1
    -1
      kernel/x86_64/zasum.c
  14. +1
    -1
      kernel/x86_64/zdot.c
  15. +1
    -1
      lapack/getrf/getrf_parallel.c
  16. +2
    -2
      lapack/lauum/lauum_L_parallel.c
  17. +2
    -2
      lapack/lauum/lauum_U_parallel.c
  18. +2
    -2
      lapack/potrf/potrf_L_parallel.c
  19. +2
    -2
      lapack/potrf/potrf_U_parallel.c

+ 5
- 4
cpuid_x86.c View File

@@ -323,9 +323,11 @@ int get_vendor(void){

int get_cputype(int gettype){
int eax, ebx, ecx, edx;
/*
int extend_family, family;
int extend_model, model;
int type, stepping;
*/
int feature = 0;

cpuid(1, &eax, &ebx, &ecx, &edx);
@@ -428,7 +430,8 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);

if (cpuid_level > 1) {
int numcalls =0 ;
int numcalls;
cpuid(2, &eax, &ebx, &ecx, &edx);
numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
info[ 0] = BITMASK(eax, 8, 0xff);
@@ -1637,7 +1640,6 @@ int get_cpuname(void){
else
return CPUTYPE_BARCELONA;
}
break;
case 10: // Zen3
if(support_avx())
#ifndef NO_AVX2
@@ -2193,7 +2195,6 @@ int get_coretype(void){
else
return CORE_NEHALEM;
#endif
break;

case 7:
if (model == 10)
@@ -2582,4 +2583,4 @@ void get_sse(void){
if (features & HAVE_FMA3 ) printf("HAVE_FMA3=1\n");

}
//}
//}

+ 28
- 12
driver/others/blas_server.c View File

@@ -209,7 +209,8 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
/* REAL / Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, double *, BLASLONG,
double *, BLASLONG, void *) = func;
double *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG,
double *, BLASLONG, double *, BLASLONG, void *)) func;

afunc(args -> m, args -> n, args -> k,
((double *)args -> alpha)[0],
@@ -220,7 +221,10 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
/* REAL / Single */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG,
float *, BLASLONG, void *) = func;
float *, BLASLONG, void *) = (void (*)
(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG,
float *, BLASLONG, void *)) func;

afunc(args -> m, args -> n, args -> k,
((float *)args -> alpha)[0],
@@ -232,7 +236,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
/* REAL / BFLOAT16 */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, bfloat16,
bfloat16 *, BLASLONG, bfloat16 *, BLASLONG,
bfloat16 *, BLASLONG, void *) = func;
bfloat16 *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, bfloat16,
bfloat16 *, BLASLONG, bfloat16 *, BLASLONG,
bfloat16 *, BLASLONG, void *)) func;

afunc(args -> m, args -> n, args -> k,
((bfloat16 *)args -> alpha)[0],
@@ -243,7 +249,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
/* REAL / BLAS_STOBF16 */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, bfloat16 *, BLASLONG,
float *, BLASLONG, void *) = func;
float *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, bfloat16 *, BLASLONG,
float *, BLASLONG, void *)) func;

afunc(args -> m, args -> n, args -> k,
((float *)args -> alpha)[0],
@@ -254,7 +262,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
/* REAL / BLAS_DTOBF16 */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, bfloat16 *, BLASLONG,
double *, BLASLONG, void *) = func;
double *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, bfloat16 *, BLASLONG,
double *, BLASLONG, void *)) func;

afunc(args -> m, args -> n, args -> k,
((double *)args -> alpha)[0],
@@ -271,7 +281,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
/* COMPLEX / Extended Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG,
xdouble *, BLASLONG, void *) = func;
xdouble *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG,
xdouble *, BLASLONG, void *)) func;

afunc(args -> m, args -> n, args -> k,
((xdouble *)args -> alpha)[0],
@@ -285,7 +297,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
/* COMPLEX / Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double,
double *, BLASLONG, double *, BLASLONG,
double *, BLASLONG, void *) = func;
double *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, double, double,
double *, BLASLONG, double *, BLASLONG,
double *, BLASLONG, void *)) func;

afunc(args -> m, args -> n, args -> k,
((double *)args -> alpha)[0],
@@ -297,7 +311,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
/* COMPLEX / Single */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG,
float *, BLASLONG, void *) = func;
float *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG,
float *, BLASLONG, void *)) func;

afunc(args -> m, args -> n, args -> k,
((float *)args -> alpha)[0],
@@ -425,7 +441,7 @@ blas_queue_t *tscq;
#endif

if (queue) {
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = (int (*)(blas_arg_t *, void *, void *, void *, void *, BLASLONG))queue -> routine;

atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)1);

@@ -503,7 +519,7 @@ blas_queue_t *tscq;
legacy_exec(routine, queue -> mode, queue -> args, sb);
} else
if (queue -> mode & BLAS_PTHREAD) {
void (*pthreadcompat)(void *) = queue -> routine;
void (*pthreadcompat)(void *) = (void(*)(void*))queue -> routine;
(pthreadcompat)(queue -> args);
} else
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
@@ -871,13 +887,13 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
fprintf(STDERR, "\n");
#endif

routine = queue -> routine;
routine = (int (*)(blas_arg_t *, void *, void *, double *, double *, BLASLONG))queue -> routine;

if (queue -> mode & BLAS_LEGACY) {
legacy_exec(routine, queue -> mode, queue -> args, queue -> sb);
} else
if (queue -> mode & BLAS_PTHREAD) {
void (*pthreadcompat)(void *) = queue -> routine;
void (*pthreadcompat)(void *) = (void (*)(void*))queue -> routine;
(pthreadcompat)(queue -> args);
} else
(routine)(queue -> args, queue -> range_m, queue -> range_n,


+ 1
- 1
interface/axpy.c View File

@@ -115,7 +115,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
#endif

blas_level1_thread(mode, n, 0, 0, &alpha,
x, incx, y, incy, NULL, 0, (void *)AXPYU_K, nthreads);
x, incx, y, incy, NULL, 0, (int (*)(void))AXPYU_K, nthreads);

}
#endif


+ 1
- 1
interface/scal.c View File

@@ -102,7 +102,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){
#else
&alpha,
#endif
x, incx, NULL, 0, NULL, 0, (void *)SCAL_K, nthreads);
x, incx, NULL, 0, NULL, 0, (int (*)(void))SCAL_K, nthreads);

}
#endif


+ 2
- 2
interface/zaxpy.c View File

@@ -128,9 +128,9 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in

blas_level1_thread(mode, n, 0, 0, ALPHA, x, incx, y, incy, NULL, 0,
#ifndef CONJ
(void *)AXPYU_K,
(int (*)(void))AXPYU_K,
#else
(void *)AXPYC_K,
(int (*)(void))AXPYC_K,
#endif
nthreads);
}


+ 1
- 1
interface/zscal.c View File

@@ -108,7 +108,7 @@ void CNAME(blasint n, FLOAT alpha_r, void *vx, blasint incx){
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif

blas_level1_thread(mode, n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0, (void *)SCAL_K, nthreads);
blas_level1_thread(mode, n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0, (int (*)(void))SCAL_K, nthreads);

}
#endif


+ 1
- 1
kernel/x86_64/casum.c View File

@@ -130,7 +130,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#endif
blas_level1_thread_with_return_value(mode, n, 0, 0, dummy_alpha, x, inc_x,
NULL, 0, result, 0, (void *)asum_thread_function, nthreads);
NULL, 0, result, 0, (int (*)(void))asum_thread_function, nthreads);
ptr = (FLOAT *)result;
for (i = 0; i < nthreads; i++) {
sumf += (*ptr);


+ 1
- 1
kernel/x86_64/dasum.c View File

@@ -114,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
#else
mode = BLAS_DOUBLE | BLAS_REAL;
#endif
blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (void *)asum_thread_function, nthreads);
blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (int (*)(void))asum_thread_function, nthreads);
ptr = (FLOAT *)result;
for (i = 0; i < nthreads; i++) {
sumf += (*ptr);


+ 1
- 1
kernel/x86_64/ddot.c View File

@@ -190,7 +190,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif
blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha,
x, inc_x, y, inc_y, result, 0,
( void *)dot_thread_function, nthreads);
(int (*)(void)) dot_thread_function, nthreads);

ptr = (RETURN_TYPE *)result;
for (i = 0; i < nthreads; i++) {


+ 1
- 1
kernel/x86_64/drot.c View File

@@ -196,7 +196,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
#else
int mode = BLAS_SINGLE | BLAS_REAL | BLAS_PTHREAD;
#endif
blas_level1_thread(mode, n, 0, 0, alpha, x, inc_x, y, inc_y, &dummy_c, 0, (void *)rot_thread_function, nthreads);
blas_level1_thread(mode, n, 0, 0, alpha, x, inc_x, y, inc_y, &dummy_c, 0, (int (*)(void))rot_thread_function, nthreads);
}
#else
rot_compute(n, x, inc_x, y, inc_y, c, s);


+ 1
- 1
kernel/x86_64/sasum.c View File

@@ -123,7 +123,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
#else
mode = BLAS_DOUBLE | BLAS_REAL;
#endif
blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (void *)asum_thread_function, nthreads);
blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (int (*)(void))asum_thread_function, nthreads);
ptr = (FLOAT *)result;
for (i = 0; i < nthreads; i++) {
sumf += (*ptr);


+ 1
- 1
kernel/x86_64/srot.c View File

@@ -198,7 +198,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
#else
int mode = BLAS_SINGLE | BLAS_REAL | BLAS_PTHREAD;
#endif
blas_level1_thread(mode, n, 0, 0, alpha, x, inc_x, y, inc_y, &dummy_c, 0, (void *)rot_thread_function, nthreads);
blas_level1_thread(mode, n, 0, 0, alpha, x, inc_x, y, inc_y, &dummy_c, 0, (int (*)(void))rot_thread_function, nthreads);
}
#else
rot_compute(n, x, inc_x, y, inc_y, c, s);


+ 1
- 1
kernel/x86_64/zasum.c View File

@@ -130,7 +130,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
mode = BLAS_DOUBLE | BLAS_COMPLEX;
#endif
blas_level1_thread_with_return_value(mode, n, 0, 0, dummy_alpha, x, inc_x,
NULL, 0, result, 0, (void *)asum_thread_function, nthreads);
NULL, 0, result, 0, (int (*)(void))asum_thread_function, nthreads);
ptr = (FLOAT *)result;
for (i = 0; i < nthreads; i++) {
sumf += (*ptr);


+ 1
- 1
kernel/x86_64/zdot.c View File

@@ -215,7 +215,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA

blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha,
x, inc_x, y, inc_y, result, 0,
( void *)zdot_thread_function, nthreads);
(int (*)(void))zdot_thread_function, nthreads);

ptr = (OPENBLAS_COMPLEX_FLOAT *)result;
for (i = 0; i < nthreads; i++) {


+ 1
- 1
lapack/getrf/getrf_parallel.c View File

@@ -662,7 +662,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,

blas_level1_thread(mode, bk, is + bk + offset + 1, mn + offset, (void *)dummyalpha,
a + (- offset + is * lda) * COMPSIZE, lda, NULL, 0,
ipiv, 1, (void *)LASWP_PLUS, args -> nthreads);
ipiv, 1, (int (*)(void))LASWP_PLUS, args -> nthreads);

is += bk;
}


+ 2
- 2
lapack/lauum/lauum_L_parallel.c View File

@@ -102,7 +102,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.c = a;

syrk_thread(mode | BLAS_TRANSA_T | BLAS_TRANSB_N | BLAS_UPLO,
&newarg, NULL, NULL, (void *)HERK_LC, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(void))HERK_LC, sa, sb, args -> nthreads);

newarg.m = bk;
newarg.n = i;
@@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.b = a + (i ) * COMPSIZE;

gemm_thread_n(mode | BLAS_TRANSA_T,
&newarg, NULL, NULL, (void *)TRMM_LCLN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(void))TRMM_LCLN, sa, sb, args -> nthreads);

newarg.m = bk;
newarg.n = bk;


+ 2
- 2
lapack/lauum/lauum_U_parallel.c View File

@@ -102,7 +102,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.c = a;

syrk_thread(mode | BLAS_TRANSA_N | BLAS_TRANSB_T,
&newarg, NULL, NULL, (void *)HERK_UN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(void))HERK_UN, sa, sb, args -> nthreads);

newarg.m = i;
newarg.n = bk;
@@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.b = a + ( i * lda) * COMPSIZE;

gemm_thread_m(mode | BLAS_TRANSA_T | BLAS_RSIDE,
&newarg, NULL, NULL, (void *)TRMM_RCUN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(void))TRMM_RCUN, sa, sb, args -> nthreads);

newarg.m = bk;
newarg.n = bk;


+ 2
- 2
lapack/potrf/potrf_L_parallel.c View File

@@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.b = a + (i + bk + i * lda) * COMPSIZE;

gemm_thread_m(mode | BLAS_RSIDE | BLAS_TRANSA_T | BLAS_UPLO,
&newarg, NULL, NULL, (void *)TRSM_RCLN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(void))TRSM_RCLN, sa, sb, args -> nthreads);

newarg.n = n - i - bk;
newarg.k = bk;
@@ -121,7 +121,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
HERK_THREAD_LN(&newarg, NULL, NULL, sa, sb, 0);
#else
syrk_thread(mode | BLAS_TRANSA_N | BLAS_TRANSB_T | BLAS_UPLO,
&newarg, NULL, NULL, (void *)HERK_LN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(void))HERK_LN, sa, sb, args -> nthreads);
#endif
}
}


+ 2
- 2
lapack/potrf/potrf_U_parallel.c View File

@@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.b = a + (i + (i + bk) * lda) * COMPSIZE;

gemm_thread_n(mode | BLAS_TRANSA_T,
&newarg, NULL, NULL, (void *)TRSM_LCUN, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(void))TRSM_LCUN, sa, sb, args -> nthreads);

newarg.n = n - i - bk;
newarg.k = bk;
@@ -121,7 +121,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
HERK_THREAD_UC(&newarg, NULL, NULL, sa, sb, 0);
#else
syrk_thread(mode | BLAS_TRANSA_N | BLAS_TRANSB_T,
&newarg, NULL, NULL, (void *)HERK_UC, sa, sb, args -> nthreads);
&newarg, NULL, NULL, (int (*)(void))HERK_UC, sa, sb, args -> nthreads);
#endif
}
}


Loading…
Cancel
Save