| @@ -52,7 +52,7 @@ typedef struct { | |||
| #if BUILD_BFLOAT16_ONLY == 1 | |||
| int bgemm_p, bgemm_q, bgemm_r; | |||
| int bgemm_unroll_m, bgemm_unroll_n, bgemm_unroll_mn; | |||
| int sbgemm_align_k; | |||
| int bgemm_align_k; | |||
| int (*bgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, bfloat16 *, bfloat16 *, BLASLONG); | |||
| int (*bgemm_beta )(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG); | |||
| @@ -1245,12 +1245,12 @@ extern gotoblas_t *gotoblas; | |||
| #define HAVE_EX_L2 gotoblas -> exclusive_cache | |||
| #if (BUILD_BFLOAT16_ONLY==1) | |||
| #define SBGEMM_P gotoblas -> bgemm_p | |||
| #define SBGEMM_Q gotoblas -> bgemm_q | |||
| #define SBGEMM_R gotoblas -> bgemm_r | |||
| #define SBGEMM_UNROLL_M gotoblas -> bgemm_unroll_m | |||
| #define SBGEMM_UNROLL_N gotoblas -> bgemm_unroll_n | |||
| #define SBGEMM_UNROLL_MN gotoblas -> bgemm_unroll_mn | |||
| #define BGEMM_P gotoblas -> bgemm_p | |||
| #define BGEMM_Q gotoblas -> bgemm_q | |||
| #define BGEMM_R gotoblas -> bgemm_r | |||
| #define BGEMM_UNROLL_M gotoblas -> bgemm_unroll_m | |||
| #define BGEMM_UNROLL_N gotoblas -> bgemm_unroll_n | |||
| #define BGEMM_UNROLL_MN gotoblas -> bgemm_unroll_mn | |||
| #endif | |||
| #if (BUILD_BFLOAT16==1) | |||
| @@ -1,5 +1,6 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* Copyright (c) 2025, The OpenBLAS Project */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| @@ -169,6 +170,22 @@ | |||
| #define STOP_RPCC(COUNTER) | |||
| #endif | |||
| #if defined(HALF) | |||
| #if defined(DYNAMIC_ARCH) | |||
| #if defined(BUILD_BFLOAT16) | |||
| #define HALF_DTYPE_ALIGN_K gotoblas->sbgemm_align_k | |||
| #else | |||
| #define HALF_DTYPE_ALIGN_K gotoblas->bgemm_align_k | |||
| #endif | |||
| #else | |||
| #if defined(BUILD_BFLOAT16) | |||
| #define HALF_DTYPE_ALIGN_K SBGEMM_ALIGN_K | |||
| #else | |||
| #define HALF_DTYPE_ALIGN_K BGEMM_ALIGN_K | |||
| #endif | |||
| #endif | |||
| #endif | |||
| int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| XFLOAT *sa, XFLOAT *sb, BLASLONG dummy){ | |||
| BLASLONG k, lda, ldb, ldc; | |||
| @@ -305,12 +322,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| } | |||
| BLASLONG pad_min_l = min_l; | |||
| #if defined(HALF) | |||
| #if defined(DYNAMIC_ARCH) | |||
| pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); | |||
| #else | |||
| pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);; | |||
| #endif | |||
| pad_min_l = (min_l + HALF_DTYPE_ALIGN_K - 1) & ~(HALF_DTYPE_ALIGN_K - 1); | |||
| #endif | |||
| /* First, we have to move data A to L2 cache */ | |||
| @@ -1,6 +1,6 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* Copyright 2023 The OpenBLAS Project. */ | |||
| /* Copyright 2023, 2025 The OpenBLAS Project. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| @@ -216,6 +216,22 @@ typedef struct { | |||
| #define STOP_RPCC(COUNTER) | |||
| #endif | |||
| #if defined(HALF) | |||
| #if defined(DYNAMIC_ARCH) | |||
| #if defined(BUILD_BFLOAT16) | |||
| #define HALF_DTYPE_ALIGN_K gotoblas->sbgemm_align_k | |||
| #else | |||
| #define HALF_DTYPE_ALIGN_K gotoblas->bgemm_align_k | |||
| #endif | |||
| #else | |||
| #if defined(BUILD_BFLOAT16) | |||
| #define HALF_DTYPE_ALIGN_K SBGEMM_ALIGN_K | |||
| #else | |||
| #define HALF_DTYPE_ALIGN_K BGEMM_ALIGN_K | |||
| #endif | |||
| #endif | |||
| #endif | |||
| static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IFLOAT *sb, BLASLONG mypos){ | |||
| IFLOAT *buffer[DIVIDE_RATE]; | |||
| @@ -325,11 +341,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| BLASLONG pad_min_l = min_l; | |||
| #if defined(HALF) | |||
| #if defined(DYNAMIC_ARCH) | |||
| pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); | |||
| #else | |||
| pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);; | |||
| #endif | |||
| pad_min_l = (min_l + HALF_DTYPE_ALIGN_K - 1) & ~(HALF_DTYPE_ALIGN_K - 1); | |||
| #endif | |||
| /* Determine step size in m | |||
| @@ -1,3 +1,30 @@ | |||
| ############################################################################### | |||
| # Copyright (c) 2025, The OpenBLAS Project | |||
| # All rights reserved. | |||
| # Redistribution and use in source and binary forms, with or without | |||
| # modification, are permitted provided that the following conditions are | |||
| # met: | |||
| # 1. Redistributions of source code must retain the above copyright | |||
| # notice, this list of conditions and the following disclaimer. | |||
| # 2. Redistributions in binary form must reproduce the above copyright | |||
| # notice, this list of conditions and the following disclaimer in | |||
| # the documentation and/or other materials provided with the | |||
| # distribution. | |||
| # 3. Neither the name of the OpenBLAS project nor the names of | |||
| # its contributors may be used to endorse or promote products | |||
| # derived from this software without specific prior written permission. | |||
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| # ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
| # POSSIBILITY OF SUCH DAMAGE. | |||
| ############################################################################### | |||
| USE_GEMM3M = 0 | |||
| OS := $(shell uname) | |||
| @@ -660,7 +687,7 @@ $(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY) | |||
| $(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY) | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) | |||
| #ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) | |||
| $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY) | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| @@ -668,7 +695,7 @@ $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY) | |||
| $(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY) | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| #endif | |||
| endif | |||
| $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) | |||