From 1eb0815b098b5ff181d037dd845fe714d0aaa0ca Mon Sep 17 00:00:00 2001 From: Ye Tao Date: Wed, 21 May 2025 11:01:42 +0000 Subject: [PATCH] support mutithreaded bgemm interface --- common_b.h | 17 ++++++++++++++++ common_level3.h | 5 +++++ common_macro.h | 17 ++++++++++++++++ driver/level3/Makefile | 45 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+) diff --git a/common_b.h b/common_b.h index 103b1fb41..744b082c2 100644 --- a/common_b.h +++ b/common_b.h @@ -55,4 +55,21 @@ #define BGEMM_RT bgemm_nt #define BGEMM_RC bgemm_nt #define BGEMM_RR bgemm_nn + +#define BGEMM_THREAD_NN bgemm_thread_nn +#define BGEMM_THREAD_CN bgemm_thread_tn +#define BGEMM_THREAD_TN bgemm_thread_tn +#define BGEMM_THREAD_NC bgemm_thread_nt +#define BGEMM_THREAD_NT bgemm_thread_nt +#define BGEMM_THREAD_CC bgemm_thread_tt +#define BGEMM_THREAD_CT bgemm_thread_tt +#define BGEMM_THREAD_TC bgemm_thread_tt +#define BGEMM_THREAD_TT bgemm_thread_tt +#define BGEMM_THREAD_NR bgemm_thread_nn +#define BGEMM_THREAD_TR bgemm_thread_tn +#define BGEMM_THREAD_CR bgemm_thread_tn +#define BGEMM_THREAD_RN bgemm_thread_nn +#define BGEMM_THREAD_RT bgemm_thread_nt +#define BGEMM_THREAD_RC bgemm_thread_nt +#define BGEMM_THREAD_RR bgemm_thread_nn #endif \ No newline at end of file diff --git a/common_level3.h b/common_level3.h index 1a253d050..1cd088821 100644 --- a/common_level3.h +++ b/common_level3.h @@ -768,6 +768,11 @@ int xgemm_cr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLON int xgemm_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG); #endif +int bgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG); +int bgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG); +int bgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG); +int bgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG); + int sbgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG); int sbgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG); int sbgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG); diff --git a/common_macro.h b/common_macro.h index bb3191d89..48e78564d 100644 --- a/common_macro.h +++ b/common_macro.h @@ -687,6 +687,23 @@ #define GEMM_INCOPY BGEMM_INCOPY #define GEMM_ITCOPY BGEMM_ITCOPY +#define GEMM_THREAD_NN BGEMM_THREAD_NN +#define GEMM_THREAD_CN BGEMM_THREAD_TN +#define GEMM_THREAD_TN BGEMM_THREAD_TN +#define GEMM_THREAD_NC BGEMM_THREAD_NT +#define GEMM_THREAD_NT BGEMM_THREAD_NT +#define GEMM_THREAD_CC BGEMM_THREAD_TT +#define GEMM_THREAD_CT BGEMM_THREAD_TT +#define GEMM_THREAD_TC BGEMM_THREAD_TT +#define GEMM_THREAD_TT BGEMM_THREAD_TT +#define GEMM_THREAD_NR BGEMM_THREAD_NN +#define GEMM_THREAD_TR BGEMM_THREAD_TN +#define GEMM_THREAD_CR BGEMM_THREAD_TN +#define GEMM_THREAD_RN BGEMM_THREAD_NN +#define GEMM_THREAD_RT BGEMM_THREAD_NT +#define GEMM_THREAD_RC BGEMM_THREAD_NT +#define GEMM_THREAD_RR BGEMM_THREAD_NN + #elif defined(BFLOAT16) #define D_TO_BF16_K SBDTOBF16_K #define D_BF16_TO_K DBF16TOD_K diff --git a/driver/level3/Makefile b/driver/level3/Makefile index c30483842..bd8351013 100644 --- a/driver/level3/Makefile +++ b/driver/level3/Makefile @@ -1,3 +1,32 @@ +############################################################################### +# Copyright (c) 2025, The OpenBLAS Project +# All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# 3. Neither the name of the OpenBLAS project nor the names of +# its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +############################################################################### + TOPDIR = ../.. include ../../Makefile.system @@ -207,6 +236,10 @@ COMMONOBJS += gemm_thread_m.$(SUFFIX) gemm_thread_n.$(SUFFIX) gemm_thread_mn.$( COMMONOBJS += syrk_thread.$(SUFFIX) ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1) +ifeq ($(BUILD_BFLOAT16_ONLY), 1) +BBLASOBJS += bgemm_thread_nn.$(SUFFIX) bgemm_thread_nt.$(SUFFIX) bgemm_thread_tn.$(SUFFIX) bgemm_thread_tt.$(SUFFIX) +endif + ifeq ($(BUILD_BFLOAT16),1) SBBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX) endif @@ -550,6 +583,18 @@ gemm_thread_variable.$(SUFFIX) : gemm_thread_variable.c ../../common.h beta_thread.$(SUFFIX) : beta_thread.c ../../common.h $(CC) -c $(CFLAGS) $< -o $(@F) +bgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h + $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) + +bgemm_thread_nt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h + $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNT $< -o $(@F) + +bgemm_thread_tn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h + $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTN $< -o $(@F) + +bgemm_thread_tt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h + $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) + sbgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)