diff --git a/common_b.h b/common_b.h index 744b082c2..4ac8f5cb3 100644 --- a/common_b.h +++ b/common_b.h @@ -30,7 +30,7 @@ #define COMMON_B_H // for now, only support DYNAMIC_ARCH = 0 case. - +#ifndef DYNAMIC_ARCH #define BGEMM_ONCOPY bgemm_oncopy #define BGEMM_OTCOPY bgemm_otcopy #define BGEMM_INCOPY bgemm_incopy @@ -39,6 +39,17 @@ #define BGEMM_BETA bgemm_beta #define BGEMM_KERNEL bgemm_kernel +#else + +#define BGEMM_ONCOPY gotoblas -> bgemm_oncopy +#define BGEMM_OTCOPY gotoblas -> bgemm_otcopy +#define BGEMM_INCOPY gotoblas -> bgemm_incopy +#define BGEMM_ITCOPY gotoblas -> bgemm_itcopy +#define BGEMM_BETA gotoblas -> bgemm_beta +#define BGEMM_KERNEL gotoblas -> bgemm_kernel + +#endif + #define BGEMM_NN bgemm_nn #define BGEMM_CN bgemm_tn #define BGEMM_TN bgemm_tn diff --git a/common_param.h b/common_param.h index 61480ba18..a527d35ed 100644 --- a/common_param.h +++ b/common_param.h @@ -49,6 +49,21 @@ typedef struct { int switch_ratio; int offsetA, offsetB, align; +#if BUILD_BFLOAT16_ONLY == 1 + int bgemm_p, bgemm_q, bgemm_r; + int bgemm_unroll_m, bgemm_unroll_n, bgemm_unroll_mn; + int sbgemm_align_k; + + int (*bgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, bfloat16 *, bfloat16 *, BLASLONG); + int (*bgemm_beta )(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG); + + int (*bgemm_incopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); + int (*bgemm_itcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); + int (*bgemm_oncopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); + int (*bgemm_otcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); + +#endif + #if BUILD_BFLOAT16 == 1 int sbgemm_p, sbgemm_q, sbgemm_r; int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn; @@ -1229,6 +1244,15 @@ extern gotoblas_t *gotoblas; #define HAVE_EX_L2 gotoblas -> exclusive_cache +#if (BUILD_BFLOAT16_ONLY==1) +#define SBGEMM_P gotoblas -> bgemm_p +#define SBGEMM_Q gotoblas -> bgemm_q +#define SBGEMM_R gotoblas -> bgemm_r +#define SBGEMM_UNROLL_M gotoblas -> bgemm_unroll_m +#define SBGEMM_UNROLL_N gotoblas -> bgemm_unroll_n +#define SBGEMM_UNROLL_MN gotoblas -> bgemm_unroll_mn +#endif + #if (BUILD_BFLOAT16==1) #define SBGEMM_P gotoblas -> sbgemm_p #define SBGEMM_Q gotoblas -> sbgemm_q diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 5a5045ce2..78b59835f 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -1,6 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ -/* Copyright 2023 The OpenBLAS Project. */ +/* Copyright 2023, 2025 The OpenBLAS Project. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -56,6 +56,20 @@ gotoblas_t TABLE_NAME = { GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, +#ifdef BUILD_BFLOAT16_ONLY + 0, 0, 0, + BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N, +#ifdef BGEMM_DEFAULT_UNROLL_MN + BGEMM_DEFAULT_UNROLL_MN, +#else + MAX(BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N), +#endif + BGEMM_ALIGN_K, + bgemm_kernelTS, bgemm_betaTS, + bgemm_incopyTS, bgemm_itcopyTS, + bgemm_oncopyTS, bgemm_otcopyTS, +#endif + #ifdef BUILD_BFLOAT16 0, 0, 0, SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,