From b6cb5ece5845512c1598aaca03831f8e6f63756a Mon Sep 17 00:00:00 2001 From: "shubham.chaudhari" Date: Fri, 28 Feb 2025 13:10:40 +0530 Subject: [PATCH 1/3] Add thread throttling profile for DGEMV on NEOVERSEV1 --- interface/gemv.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/interface/gemv.c b/interface/gemv.c index d03133946..360b82dcd 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -89,6 +89,24 @@ static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) { } #endif +//thread throttling for dgemv +#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) +static inline int get_dgemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { + + return + MN < 8100L ? 1 +: MN < 12100L ? MIN(ncpu, 2) +: MN < 36100L ? MIN(ncpu, 4) +: MN < 84100L ? MIN(ncpu, 8) +: MN < 348100L ? MIN(ncpu, 16) +: MN < 435600L ? MIN(ncpu, 24) +: MN < 810000L ? MIN(ncpu, 32) +: MN < 1050625 ? MIN(ncpu, 40) +: ncpu; + +} +#endif + static inline int get_gemv_optimal_nthreads(BLASLONG MN) { int ncpu = num_cpu_avail(3); #if defined(_WIN64) && defined(_M_ARM64) @@ -98,6 +116,8 @@ static inline int get_gemv_optimal_nthreads(BLASLONG MN) { #endif #if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); +#elif defined(NEOVERSEV1) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16) + return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu); #elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); #elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) From 189dbbc04ff6fb4d58168fd1aef11c21ed9d14c4 Mon Sep 17 00:00:00 2001 From: "shubham.chaudhari" Date: Tue, 4 Mar 2025 16:08:55 +0530 Subject: [PATCH 2/3] Add thread throttling for dynamic arch neoversev1 --- interface/gemv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/interface/gemv.c b/interface/gemv.c index 360b82dcd..22409649e 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -127,6 +127,12 @@ static inline int get_gemv_optimal_nthreads(BLASLONG MN) { if (strcmp(gotoblas_corename(), "neoversev2") == 0) { return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); } +#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16) + if (strcmp(gotoblas_corename(), "neoversev1") == 0) { + return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu); + } + + #endif if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD ) From 8e289ecddc7f51913d3fafcb11957d780c7d3d7e Mon Sep 17 00:00:00 2001 From: "shubham.chaudhari" Date: Tue, 18 Mar 2025 13:24:05 +0530 Subject: [PATCH 3/3] Simplified thread throttling function in gemv --- interface/gemv.c | 51 +++++++++++++++++------------------------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/interface/gemv.c b/interface/gemv.c index 22409649e..34b6addd3 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -70,11 +70,22 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT #if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { - return - MN < 25600L ? 1 - : MN < 63001L ? MIN(ncpu, 4) - : MN < 459684L ? MIN(ncpu, 16) - : ncpu; + #ifdef DOUBLE + return (MN < 8100L) ? 1 + : (MN < 12100L) ? MIN(ncpu, 2) + : (MN < 36100L) ? MIN(ncpu, 4) + : (MN < 84100L) ? MIN(ncpu, 8) + : (MN < 348100L) ? MIN(ncpu, 16) + : (MN < 435600L) ? MIN(ncpu, 24) + : (MN < 810000L) ? MIN(ncpu, 32) + : (MN < 1050625L) ? MIN(ncpu, 40) + : ncpu; + #else + return (MN < 25600L) ? 1 + : (MN < 63001L) ? MIN(ncpu, 4) + : (MN < 459684L) ? MIN(ncpu, 16) + : ncpu; + #endif } #endif @@ -89,24 +100,6 @@ static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) { } #endif -//thread throttling for dgemv -#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) -static inline int get_dgemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { - - return - MN < 8100L ? 1 -: MN < 12100L ? MIN(ncpu, 2) -: MN < 36100L ? MIN(ncpu, 4) -: MN < 84100L ? MIN(ncpu, 8) -: MN < 348100L ? MIN(ncpu, 16) -: MN < 435600L ? MIN(ncpu, 24) -: MN < 810000L ? MIN(ncpu, 32) -: MN < 1050625 ? MIN(ncpu, 40) -: ncpu; - -} -#endif - static inline int get_gemv_optimal_nthreads(BLASLONG MN) { int ncpu = num_cpu_avail(3); #if defined(_WIN64) && defined(_M_ARM64) @@ -114,25 +107,17 @@ static inline int get_gemv_optimal_nthreads(BLASLONG MN) { return num_cpu_avail(4); return 1; #endif -#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) +#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(BFLOAT16) return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); -#elif defined(NEOVERSEV1) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16) - return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu); #elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); -#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) +#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(BFLOAT16) if (strcmp(gotoblas_corename(), "neoversev1") == 0) { return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); } if (strcmp(gotoblas_corename(), "neoversev2") == 0) { return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); } -#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16) - if (strcmp(gotoblas_corename(), "neoversev1") == 0) { - return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu); - } - - #endif if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )