WIP casts and cleanupstags/v0.3.20
| @@ -323,9 +323,11 @@ int get_vendor(void){ | |||
| int get_cputype(int gettype){ | |||
| int eax, ebx, ecx, edx; | |||
| /* | |||
| int extend_family, family; | |||
| int extend_model, model; | |||
| int type, stepping; | |||
| */ | |||
| int feature = 0; | |||
| cpuid(1, &eax, &ebx, &ecx, &edx); | |||
| @@ -428,7 +430,8 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | |||
| cpuid(0, &cpuid_level, &ebx, &ecx, &edx); | |||
| if (cpuid_level > 1) { | |||
| int numcalls =0 ; | |||
| int numcalls; | |||
| cpuid(2, &eax, &ebx, &ecx, &edx); | |||
| numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries | |||
| info[ 0] = BITMASK(eax, 8, 0xff); | |||
| @@ -1637,7 +1640,6 @@ int get_cpuname(void){ | |||
| else | |||
| return CPUTYPE_BARCELONA; | |||
| } | |||
| break; | |||
| case 10: // Zen3 | |||
| if(support_avx()) | |||
| #ifndef NO_AVX2 | |||
| @@ -2193,7 +2195,6 @@ int get_coretype(void){ | |||
| else | |||
| return CORE_NEHALEM; | |||
| #endif | |||
| break; | |||
| case 7: | |||
| if (model == 10) | |||
| @@ -2582,4 +2583,4 @@ void get_sse(void){ | |||
| if (features & HAVE_FMA3 ) printf("HAVE_FMA3=1\n"); | |||
| } | |||
| //} | |||
| //} | |||
| @@ -209,7 +209,8 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||
| /* REAL / Double */ | |||
| void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, | |||
| double *, BLASLONG, double *, BLASLONG, | |||
| double *, BLASLONG, void *) = func; | |||
| double *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, | |||
| double *, BLASLONG, double *, BLASLONG, void *)) func; | |||
| afunc(args -> m, args -> n, args -> k, | |||
| ((double *)args -> alpha)[0], | |||
| @@ -220,7 +221,10 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||
| /* REAL / Single */ | |||
| void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, | |||
| float *, BLASLONG, float *, BLASLONG, | |||
| float *, BLASLONG, void *) = func; | |||
| float *, BLASLONG, void *) = (void (*) | |||
| (BLASLONG, BLASLONG, BLASLONG, float, | |||
| float *, BLASLONG, float *, BLASLONG, | |||
| float *, BLASLONG, void *)) func; | |||
| afunc(args -> m, args -> n, args -> k, | |||
| ((float *)args -> alpha)[0], | |||
| @@ -232,7 +236,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||
| /* REAL / BFLOAT16 */ | |||
| void (*afunc)(BLASLONG, BLASLONG, BLASLONG, bfloat16, | |||
| bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, | |||
| bfloat16 *, BLASLONG, void *) = func; | |||
| bfloat16 *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, bfloat16, | |||
| bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, | |||
| bfloat16 *, BLASLONG, void *)) func; | |||
| afunc(args -> m, args -> n, args -> k, | |||
| ((bfloat16 *)args -> alpha)[0], | |||
| @@ -243,7 +249,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||
| /* REAL / BLAS_STOBF16 */ | |||
| void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, | |||
| float *, BLASLONG, bfloat16 *, BLASLONG, | |||
| float *, BLASLONG, void *) = func; | |||
| float *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, float, | |||
| float *, BLASLONG, bfloat16 *, BLASLONG, | |||
| float *, BLASLONG, void *)) func; | |||
| afunc(args -> m, args -> n, args -> k, | |||
| ((float *)args -> alpha)[0], | |||
| @@ -254,7 +262,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||
| /* REAL / BLAS_DTOBF16 */ | |||
| void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, | |||
| double *, BLASLONG, bfloat16 *, BLASLONG, | |||
| double *, BLASLONG, void *) = func; | |||
| double *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, double, | |||
| double *, BLASLONG, bfloat16 *, BLASLONG, | |||
| double *, BLASLONG, void *)) func; | |||
| afunc(args -> m, args -> n, args -> k, | |||
| ((double *)args -> alpha)[0], | |||
| @@ -271,7 +281,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||
| /* COMPLEX / Extended Double */ | |||
| void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, | |||
| xdouble *, BLASLONG, xdouble *, BLASLONG, | |||
| xdouble *, BLASLONG, void *) = func; | |||
| xdouble *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, | |||
| xdouble *, BLASLONG, xdouble *, BLASLONG, | |||
| xdouble *, BLASLONG, void *)) func; | |||
| afunc(args -> m, args -> n, args -> k, | |||
| ((xdouble *)args -> alpha)[0], | |||
| @@ -285,7 +297,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||
| /* COMPLEX / Double */ | |||
| void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double, | |||
| double *, BLASLONG, double *, BLASLONG, | |||
| double *, BLASLONG, void *) = func; | |||
| double *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, double, double, | |||
| double *, BLASLONG, double *, BLASLONG, | |||
| double *, BLASLONG, void *)) func; | |||
| afunc(args -> m, args -> n, args -> k, | |||
| ((double *)args -> alpha)[0], | |||
| @@ -297,7 +311,9 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||
| /* COMPLEX / Single */ | |||
| void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float, | |||
| float *, BLASLONG, float *, BLASLONG, | |||
| float *, BLASLONG, void *) = func; | |||
| float *, BLASLONG, void *) = (void (*)(BLASLONG, BLASLONG, BLASLONG, float, float, | |||
| float *, BLASLONG, float *, BLASLONG, | |||
| float *, BLASLONG, void *)) func; | |||
| afunc(args -> m, args -> n, args -> k, | |||
| ((float *)args -> alpha)[0], | |||
| @@ -425,7 +441,7 @@ blas_queue_t *tscq; | |||
| #endif | |||
| if (queue) { | |||
| int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine; | |||
| int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = (int (*)(blas_arg_t *, void *, void *, void *, void *, BLASLONG))queue -> routine; | |||
| atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)1); | |||
| @@ -503,7 +519,7 @@ blas_queue_t *tscq; | |||
| legacy_exec(routine, queue -> mode, queue -> args, sb); | |||
| } else | |||
| if (queue -> mode & BLAS_PTHREAD) { | |||
| void (*pthreadcompat)(void *) = queue -> routine; | |||
| void (*pthreadcompat)(void *) = (void(*)(void*))queue -> routine; | |||
| (pthreadcompat)(queue -> args); | |||
| } else | |||
| (routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position); | |||
| @@ -871,13 +887,13 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){ | |||
| fprintf(STDERR, "\n"); | |||
| #endif | |||
| routine = queue -> routine; | |||
| routine = (int (*)(blas_arg_t *, void *, void *, double *, double *, BLASLONG))queue -> routine; | |||
| if (queue -> mode & BLAS_LEGACY) { | |||
| legacy_exec(routine, queue -> mode, queue -> args, queue -> sb); | |||
| } else | |||
| if (queue -> mode & BLAS_PTHREAD) { | |||
| void (*pthreadcompat)(void *) = queue -> routine; | |||
| void (*pthreadcompat)(void *) = (void (*)(void*))queue -> routine; | |||
| (pthreadcompat)(queue -> args); | |||
| } else | |||
| (routine)(queue -> args, queue -> range_m, queue -> range_n, | |||
| @@ -115,7 +115,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc | |||
| #endif | |||
| blas_level1_thread(mode, n, 0, 0, &alpha, | |||
| x, incx, y, incy, NULL, 0, (void *)AXPYU_K, nthreads); | |||
| x, incx, y, incy, NULL, 0, (int (*)(void))AXPYU_K, nthreads); | |||
| } | |||
| #endif | |||
| @@ -102,7 +102,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){ | |||
| #else | |||
| &alpha, | |||
| #endif | |||
| x, incx, NULL, 0, NULL, 0, (void *)SCAL_K, nthreads); | |||
| x, incx, NULL, 0, NULL, 0, (int (*)(void))SCAL_K, nthreads); | |||
| } | |||
| #endif | |||
| @@ -128,9 +128,9 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in | |||
| blas_level1_thread(mode, n, 0, 0, ALPHA, x, incx, y, incy, NULL, 0, | |||
| #ifndef CONJ | |||
| (void *)AXPYU_K, | |||
| (int (*)(void))AXPYU_K, | |||
| #else | |||
| (void *)AXPYC_K, | |||
| (int (*)(void))AXPYC_K, | |||
| #endif | |||
| nthreads); | |||
| } | |||
| @@ -108,7 +108,7 @@ void CNAME(blasint n, FLOAT alpha_r, void *vx, blasint incx){ | |||
| mode = BLAS_SINGLE | BLAS_COMPLEX; | |||
| #endif | |||
| blas_level1_thread(mode, n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0, (void *)SCAL_K, nthreads); | |||
| blas_level1_thread(mode, n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0, (int (*)(void))SCAL_K, nthreads); | |||
| } | |||
| #endif | |||
| @@ -130,7 +130,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| mode = BLAS_DOUBLE | BLAS_COMPLEX; | |||
| #endif | |||
| blas_level1_thread_with_return_value(mode, n, 0, 0, dummy_alpha, x, inc_x, | |||
| NULL, 0, result, 0, (void *)asum_thread_function, nthreads); | |||
| NULL, 0, result, 0, (int (*)(void))asum_thread_function, nthreads); | |||
| ptr = (FLOAT *)result; | |||
| for (i = 0; i < nthreads; i++) { | |||
| sumf += (*ptr); | |||
| @@ -114,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| #else | |||
| mode = BLAS_DOUBLE | BLAS_REAL; | |||
| #endif | |||
| blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (void *)asum_thread_function, nthreads); | |||
| blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (int (*)(void))asum_thread_function, nthreads); | |||
| ptr = (FLOAT *)result; | |||
| for (i = 0; i < nthreads; i++) { | |||
| sumf += (*ptr); | |||
| @@ -190,7 +190,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| #endif | |||
| blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, | |||
| x, inc_x, y, inc_y, result, 0, | |||
| ( void *)dot_thread_function, nthreads); | |||
| (int (*)(void)) dot_thread_function, nthreads); | |||
| ptr = (RETURN_TYPE *)result; | |||
| for (i = 0; i < nthreads; i++) { | |||
| @@ -196,7 +196,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT | |||
| #else | |||
| int mode = BLAS_SINGLE | BLAS_REAL | BLAS_PTHREAD; | |||
| #endif | |||
| blas_level1_thread(mode, n, 0, 0, alpha, x, inc_x, y, inc_y, &dummy_c, 0, (void *)rot_thread_function, nthreads); | |||
| blas_level1_thread(mode, n, 0, 0, alpha, x, inc_x, y, inc_y, &dummy_c, 0, (int (*)(void))rot_thread_function, nthreads); | |||
| } | |||
| #else | |||
| rot_compute(n, x, inc_x, y, inc_y, c, s); | |||
| @@ -123,7 +123,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| #else | |||
| mode = BLAS_DOUBLE | BLAS_REAL; | |||
| #endif | |||
| blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (void *)asum_thread_function, nthreads); | |||
| blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (int (*)(void))asum_thread_function, nthreads); | |||
| ptr = (FLOAT *)result; | |||
| for (i = 0; i < nthreads; i++) { | |||
| sumf += (*ptr); | |||
| @@ -198,7 +198,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT | |||
| #else | |||
| int mode = BLAS_SINGLE | BLAS_REAL | BLAS_PTHREAD; | |||
| #endif | |||
| blas_level1_thread(mode, n, 0, 0, alpha, x, inc_x, y, inc_y, &dummy_c, 0, (void *)rot_thread_function, nthreads); | |||
| blas_level1_thread(mode, n, 0, 0, alpha, x, inc_x, y, inc_y, &dummy_c, 0, (int (*)(void))rot_thread_function, nthreads); | |||
| } | |||
| #else | |||
| rot_compute(n, x, inc_x, y, inc_y, c, s); | |||
| @@ -130,7 +130,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| mode = BLAS_DOUBLE | BLAS_COMPLEX; | |||
| #endif | |||
| blas_level1_thread_with_return_value(mode, n, 0, 0, dummy_alpha, x, inc_x, | |||
| NULL, 0, result, 0, (void *)asum_thread_function, nthreads); | |||
| NULL, 0, result, 0, (int (*)(void))asum_thread_function, nthreads); | |||
| ptr = (FLOAT *)result; | |||
| for (i = 0; i < nthreads; i++) { | |||
| sumf += (*ptr); | |||
| @@ -215,7 +215,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA | |||
| blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, | |||
| x, inc_x, y, inc_y, result, 0, | |||
| ( void *)zdot_thread_function, nthreads); | |||
| (int (*)(void))zdot_thread_function, nthreads); | |||
| ptr = (OPENBLAS_COMPLEX_FLOAT *)result; | |||
| for (i = 0; i < nthreads; i++) { | |||
| @@ -662,7 +662,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||
| blas_level1_thread(mode, bk, is + bk + offset + 1, mn + offset, (void *)dummyalpha, | |||
| a + (- offset + is * lda) * COMPSIZE, lda, NULL, 0, | |||
| ipiv, 1, (void *)LASWP_PLUS, args -> nthreads); | |||
| ipiv, 1, (int (*)(void))LASWP_PLUS, args -> nthreads); | |||
| is += bk; | |||
| } | |||
| @@ -102,7 +102,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||
| newarg.c = a; | |||
| syrk_thread(mode | BLAS_TRANSA_T | BLAS_TRANSB_N | BLAS_UPLO, | |||
| &newarg, NULL, NULL, (void *)HERK_LC, sa, sb, args -> nthreads); | |||
| &newarg, NULL, NULL, (int (*)(void))HERK_LC, sa, sb, args -> nthreads); | |||
| newarg.m = bk; | |||
| newarg.n = i; | |||
| @@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||
| newarg.b = a + (i ) * COMPSIZE; | |||
| gemm_thread_n(mode | BLAS_TRANSA_T, | |||
| &newarg, NULL, NULL, (void *)TRMM_LCLN, sa, sb, args -> nthreads); | |||
| &newarg, NULL, NULL, (int (*)(void))TRMM_LCLN, sa, sb, args -> nthreads); | |||
| newarg.m = bk; | |||
| newarg.n = bk; | |||
| @@ -102,7 +102,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||
| newarg.c = a; | |||
| syrk_thread(mode | BLAS_TRANSA_N | BLAS_TRANSB_T, | |||
| &newarg, NULL, NULL, (void *)HERK_UN, sa, sb, args -> nthreads); | |||
| &newarg, NULL, NULL, (int (*)(void))HERK_UN, sa, sb, args -> nthreads); | |||
| newarg.m = i; | |||
| newarg.n = bk; | |||
| @@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||
| newarg.b = a + ( i * lda) * COMPSIZE; | |||
| gemm_thread_m(mode | BLAS_TRANSA_T | BLAS_RSIDE, | |||
| &newarg, NULL, NULL, (void *)TRMM_RCUN, sa, sb, args -> nthreads); | |||
| &newarg, NULL, NULL, (int (*)(void))TRMM_RCUN, sa, sb, args -> nthreads); | |||
| newarg.m = bk; | |||
| newarg.n = bk; | |||
| @@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||
| newarg.b = a + (i + bk + i * lda) * COMPSIZE; | |||
| gemm_thread_m(mode | BLAS_RSIDE | BLAS_TRANSA_T | BLAS_UPLO, | |||
| &newarg, NULL, NULL, (void *)TRSM_RCLN, sa, sb, args -> nthreads); | |||
| &newarg, NULL, NULL, (int (*)(void))TRSM_RCLN, sa, sb, args -> nthreads); | |||
| newarg.n = n - i - bk; | |||
| newarg.k = bk; | |||
| @@ -121,7 +121,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||
| HERK_THREAD_LN(&newarg, NULL, NULL, sa, sb, 0); | |||
| #else | |||
| syrk_thread(mode | BLAS_TRANSA_N | BLAS_TRANSB_T | BLAS_UPLO, | |||
| &newarg, NULL, NULL, (void *)HERK_LN, sa, sb, args -> nthreads); | |||
| &newarg, NULL, NULL, (int (*)(void))HERK_LN, sa, sb, args -> nthreads); | |||
| #endif | |||
| } | |||
| } | |||
| @@ -110,7 +110,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||
| newarg.b = a + (i + (i + bk) * lda) * COMPSIZE; | |||
| gemm_thread_n(mode | BLAS_TRANSA_T, | |||
| &newarg, NULL, NULL, (void *)TRSM_LCUN, sa, sb, args -> nthreads); | |||
| &newarg, NULL, NULL, (int (*)(void))TRSM_LCUN, sa, sb, args -> nthreads); | |||
| newarg.n = n - i - bk; | |||
| newarg.k = bk; | |||
| @@ -121,7 +121,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||
| HERK_THREAD_UC(&newarg, NULL, NULL, sa, sb, 0); | |||
| #else | |||
| syrk_thread(mode | BLAS_TRANSA_N | BLAS_TRANSB_T, | |||
| &newarg, NULL, NULL, (void *)HERK_UC, sa, sb, args -> nthreads); | |||
| &newarg, NULL, NULL, (int (*)(void))HERK_UC, sa, sb, args -> nthreads); | |||
| #endif | |||
| } | |||
| } | |||