From 5253c8f1658ab9688b871feb2d143fc6730cf98b Mon Sep 17 00:00:00 2001 From: Masato Nakagawa Date: Mon, 30 Jun 2025 21:35:16 +0900 Subject: [PATCH] Multi-thread Performance Improvement of GEMM with DIVIDE_RATE=1 for A64FX. --- driver/level3/gemm.c | 4 ++++ param.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/driver/level3/gemm.c b/driver/level3/gemm.c index 2b13da7d7..a20d6c59a 100644 --- a/driver/level3/gemm.c +++ b/driver/level3/gemm.c @@ -59,6 +59,10 @@ #define GEMM_Q 128 #endif +#ifdef GEMM_DIVIDE_RATE +#define DIVIDE_RATE GEMM_DIVIDE_RATE +#endif + #ifdef THREADED_LEVEL3 #include "level3_thread.c" #else diff --git a/param.h b/param.h index 97a666b10..885a0b637 100644 --- a/param.h +++ b/param.h @@ -3701,6 +3701,8 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #elif defined(A64FX) // 512-bit SVE +#define GEMM_DIVIDE_RATE 1 + #if defined(XDOUBLE) || defined(DOUBLE) #define GEMM_PREFERED_SIZE 8 #else