Browse Source

Add thread throttling profile for SGEMV on `NEOVERSEV2`

tags/v0.3.30
Marek Michalowski 11 months ago
parent
commit
650a062e19
2 changed files with 17 additions and 0 deletions
  1. +1
    -0
      CONTRIBUTORS.md
  2. +16
    -0
      interface/gemv.c

+ 1
- 0
CONTRIBUTORS.md View File

@@ -240,6 +240,7 @@ In chronological order:
* Marek Michalowski <marek.michalowski@arm.com>
* [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1`
* [2025-02-18] Add thread throttling profile for SGEMM on `NEOVERSEV2`
* [2025-02-19] Add thread throttling profile for SGEMV on `NEOVERSEV2`

* Ye Tao <ye.tao@arm.com>
* [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1

+ 16
- 0
interface/gemv.c View File

@@ -77,14 +77,30 @@ static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
}
#endif

#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV2)
static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
return
MN < 24964L ? 1
: MN < 65536L ? MIN(ncpu, 8)
: MN < 262144L ? MIN(ncpu, 32)
: MN < 1638400L ? MIN(ncpu, 64)
: ncpu;
}
#endif

static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
int ncpu = num_cpu_avail(3);
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
}
if (strcmp(gotoblas_corename(), "neoversev2") == 0) {
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
}
#endif

if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )


Loading…
Cancel
Save