| @@ -52,7 +52,7 @@ typedef struct { | |||||
| #if BUILD_BFLOAT16_ONLY == 1 | #if BUILD_BFLOAT16_ONLY == 1 | ||||
| int bgemm_p, bgemm_q, bgemm_r; | int bgemm_p, bgemm_q, bgemm_r; | ||||
| int bgemm_unroll_m, bgemm_unroll_n, bgemm_unroll_mn; | int bgemm_unroll_m, bgemm_unroll_n, bgemm_unroll_mn; | ||||
| int sbgemm_align_k; | |||||
| int bgemm_align_k; | |||||
| int (*bgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, bfloat16 *, bfloat16 *, BLASLONG); | int (*bgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, bfloat16 *, bfloat16 *, BLASLONG); | ||||
| int (*bgemm_beta )(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG); | int (*bgemm_beta )(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG); | ||||
| @@ -1245,12 +1245,12 @@ extern gotoblas_t *gotoblas; | |||||
| #define HAVE_EX_L2 gotoblas -> exclusive_cache | #define HAVE_EX_L2 gotoblas -> exclusive_cache | ||||
| #if (BUILD_BFLOAT16_ONLY==1) | #if (BUILD_BFLOAT16_ONLY==1) | ||||
| #define SBGEMM_P gotoblas -> bgemm_p | |||||
| #define SBGEMM_Q gotoblas -> bgemm_q | |||||
| #define SBGEMM_R gotoblas -> bgemm_r | |||||
| #define SBGEMM_UNROLL_M gotoblas -> bgemm_unroll_m | |||||
| #define SBGEMM_UNROLL_N gotoblas -> bgemm_unroll_n | |||||
| #define SBGEMM_UNROLL_MN gotoblas -> bgemm_unroll_mn | |||||
| #define BGEMM_P gotoblas -> bgemm_p | |||||
| #define BGEMM_Q gotoblas -> bgemm_q | |||||
| #define BGEMM_R gotoblas -> bgemm_r | |||||
| #define BGEMM_UNROLL_M gotoblas -> bgemm_unroll_m | |||||
| #define BGEMM_UNROLL_N gotoblas -> bgemm_unroll_n | |||||
| #define BGEMM_UNROLL_MN gotoblas -> bgemm_unroll_mn | |||||
| #endif | #endif | ||||
| #if (BUILD_BFLOAT16==1) | #if (BUILD_BFLOAT16==1) | ||||
| @@ -1,5 +1,6 @@ | |||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | /* Copyright 2009, 2010 The University of Texas at Austin. */ | ||||
| /* Copyright (c) 2025, The OpenBLAS Project */ | |||||
| /* All rights reserved. */ | /* All rights reserved. */ | ||||
| /* */ | /* */ | ||||
| /* Redistribution and use in source and binary forms, with or */ | /* Redistribution and use in source and binary forms, with or */ | ||||
| @@ -169,6 +170,22 @@ | |||||
| #define STOP_RPCC(COUNTER) | #define STOP_RPCC(COUNTER) | ||||
| #endif | #endif | ||||
| #if defined(HALF) | |||||
| #if defined(DYNAMIC_ARCH) | |||||
| #if defined(BUILD_BFLOAT16) | |||||
| #define HALF_DTYPE_ALIGN_K gotoblas->sbgemm_align_k | |||||
| #else | |||||
| #define HALF_DTYPE_ALIGN_K gotoblas->bgemm_align_k | |||||
| #endif | |||||
| #else | |||||
| #if defined(BUILD_BFLOAT16) | |||||
| #define HALF_DTYPE_ALIGN_K SBGEMM_ALIGN_K | |||||
| #else | |||||
| #define HALF_DTYPE_ALIGN_K BGEMM_ALIGN_K | |||||
| #endif | |||||
| #endif | |||||
| #endif | |||||
| int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | ||||
| XFLOAT *sa, XFLOAT *sb, BLASLONG dummy){ | XFLOAT *sa, XFLOAT *sb, BLASLONG dummy){ | ||||
| BLASLONG k, lda, ldb, ldc; | BLASLONG k, lda, ldb, ldc; | ||||
| @@ -305,12 +322,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| } | } | ||||
| BLASLONG pad_min_l = min_l; | BLASLONG pad_min_l = min_l; | ||||
| #if defined(HALF) | #if defined(HALF) | ||||
| #if defined(DYNAMIC_ARCH) | |||||
| pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); | |||||
| #else | |||||
| pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);; | |||||
| #endif | |||||
| pad_min_l = (min_l + HALF_DTYPE_ALIGN_K - 1) & ~(HALF_DTYPE_ALIGN_K - 1); | |||||
| #endif | #endif | ||||
| /* First, we have to move data A to L2 cache */ | /* First, we have to move data A to L2 cache */ | ||||
| @@ -1,6 +1,6 @@ | |||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | /* Copyright 2009, 2010 The University of Texas at Austin. */ | ||||
| /* Copyright 2023 The OpenBLAS Project. */ | |||||
| /* Copyright 2023, 2025 The OpenBLAS Project. */ | |||||
| /* All rights reserved. */ | /* All rights reserved. */ | ||||
| /* */ | /* */ | ||||
| /* Redistribution and use in source and binary forms, with or */ | /* Redistribution and use in source and binary forms, with or */ | ||||
| @@ -216,6 +216,22 @@ typedef struct { | |||||
| #define STOP_RPCC(COUNTER) | #define STOP_RPCC(COUNTER) | ||||
| #endif | #endif | ||||
| #if defined(HALF) | |||||
| #if defined(DYNAMIC_ARCH) | |||||
| #if defined(BUILD_BFLOAT16) | |||||
| #define HALF_DTYPE_ALIGN_K gotoblas->sbgemm_align_k | |||||
| #else | |||||
| #define HALF_DTYPE_ALIGN_K gotoblas->bgemm_align_k | |||||
| #endif | |||||
| #else | |||||
| #if defined(BUILD_BFLOAT16) | |||||
| #define HALF_DTYPE_ALIGN_K SBGEMM_ALIGN_K | |||||
| #else | |||||
| #define HALF_DTYPE_ALIGN_K BGEMM_ALIGN_K | |||||
| #endif | |||||
| #endif | |||||
| #endif | |||||
| static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IFLOAT *sb, BLASLONG mypos){ | static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IFLOAT *sb, BLASLONG mypos){ | ||||
| IFLOAT *buffer[DIVIDE_RATE]; | IFLOAT *buffer[DIVIDE_RATE]; | ||||
| @@ -325,11 +341,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| BLASLONG pad_min_l = min_l; | BLASLONG pad_min_l = min_l; | ||||
| #if defined(HALF) | #if defined(HALF) | ||||
| #if defined(DYNAMIC_ARCH) | |||||
| pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); | |||||
| #else | |||||
| pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);; | |||||
| #endif | |||||
| pad_min_l = (min_l + HALF_DTYPE_ALIGN_K - 1) & ~(HALF_DTYPE_ALIGN_K - 1); | |||||
| #endif | #endif | ||||
| /* Determine step size in m | /* Determine step size in m | ||||
| @@ -1,3 +1,30 @@ | |||||
| ############################################################################### | |||||
| # Copyright (c) 2025, The OpenBLAS Project | |||||
| # All rights reserved. | |||||
| # Redistribution and use in source and binary forms, with or without | |||||
| # modification, are permitted provided that the following conditions are | |||||
| # met: | |||||
| # 1. Redistributions of source code must retain the above copyright | |||||
| # notice, this list of conditions and the following disclaimer. | |||||
| # 2. Redistributions in binary form must reproduce the above copyright | |||||
| # notice, this list of conditions and the following disclaimer in | |||||
| # the documentation and/or other materials provided with the | |||||
| # distribution. | |||||
| # 3. Neither the name of the OpenBLAS project nor the names of | |||||
| # its contributors may be used to endorse or promote products | |||||
| # derived from this software without specific prior written permission. | |||||
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| # ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||||
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||||
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||||
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||||
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||||
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||||
| # POSSIBILITY OF SUCH DAMAGE. | |||||
| ############################################################################### | |||||
| USE_GEMM3M = 0 | USE_GEMM3M = 0 | ||||
| OS := $(shell uname) | OS := $(shell uname) | ||||
| @@ -660,7 +687,7 @@ $(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY) | |||||
| $(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY) | $(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY) | ||||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | ||||
| ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) | |||||
| #ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) | |||||
| $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY) | $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY) | ||||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | ||||
| @@ -668,7 +695,7 @@ $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY) | |||||
| $(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY) | $(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY) | ||||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | ||||
| endif | |||||
| #endif | |||||
| endif | endif | ||||
| $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) | $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) | ||||