Browse Source

Resolve symbol conflicts when building sbgemm and bgemm together

pull/5287/head
Ye Tao 8 months ago
parent
commit
082a9d28c3
4 changed files with 73 additions and 20 deletions
  1. +7
    -7
      common_param.h
  2. +19
    -5
      driver/level3/level3.c
  3. +18
    -6
      driver/level3/level3_thread.c
  4. +29
    -2
      kernel/Makefile.L3

+ 7
- 7
common_param.h View File

@@ -52,7 +52,7 @@ typedef struct {
#if BUILD_BFLOAT16_ONLY == 1
int bgemm_p, bgemm_q, bgemm_r;
int bgemm_unroll_m, bgemm_unroll_n, bgemm_unroll_mn;
int sbgemm_align_k;
int bgemm_align_k;

int (*bgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, bfloat16 *, bfloat16 *, BLASLONG);
int (*bgemm_beta )(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
@@ -1245,12 +1245,12 @@ extern gotoblas_t *gotoblas;
#define HAVE_EX_L2 gotoblas -> exclusive_cache

#if (BUILD_BFLOAT16_ONLY==1)
#define SBGEMM_P gotoblas -> bgemm_p
#define SBGEMM_Q gotoblas -> bgemm_q
#define SBGEMM_R gotoblas -> bgemm_r
#define SBGEMM_UNROLL_M gotoblas -> bgemm_unroll_m
#define SBGEMM_UNROLL_N gotoblas -> bgemm_unroll_n
#define SBGEMM_UNROLL_MN gotoblas -> bgemm_unroll_mn
#define BGEMM_P gotoblas -> bgemm_p
#define BGEMM_Q gotoblas -> bgemm_q
#define BGEMM_R gotoblas -> bgemm_r
#define BGEMM_UNROLL_M gotoblas -> bgemm_unroll_m
#define BGEMM_UNROLL_N gotoblas -> bgemm_unroll_n
#define BGEMM_UNROLL_MN gotoblas -> bgemm_unroll_mn
#endif

#if (BUILD_BFLOAT16==1)


+ 19
- 5
driver/level3/level3.c View File

@@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright (c) 2025, The OpenBLAS Project */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@@ -169,6 +170,22 @@
#define STOP_RPCC(COUNTER)
#endif

#if defined(HALF)
#if defined(DYNAMIC_ARCH)
#if defined(BUILD_BFLOAT16)
#define HALF_DTYPE_ALIGN_K gotoblas->sbgemm_align_k
#else
#define HALF_DTYPE_ALIGN_K gotoblas->bgemm_align_k
#endif
#else
#if defined(BUILD_BFLOAT16)
#define HALF_DTYPE_ALIGN_K SBGEMM_ALIGN_K
#else
#define HALF_DTYPE_ALIGN_K BGEMM_ALIGN_K
#endif
#endif
#endif

int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
XFLOAT *sa, XFLOAT *sb, BLASLONG dummy){
BLASLONG k, lda, ldb, ldc;
@@ -305,12 +322,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}

BLASLONG pad_min_l = min_l;

#if defined(HALF)
#if defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
#else
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
#endif
pad_min_l = (min_l + HALF_DTYPE_ALIGN_K - 1) & ~(HALF_DTYPE_ALIGN_K - 1);
#endif

/* First, we have to move data A to L2 cache */


+ 18
- 6
driver/level3/level3_thread.c View File

@@ -1,6 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* Copyright 2023, 2025 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@@ -216,6 +216,22 @@ typedef struct {
#define STOP_RPCC(COUNTER)
#endif

#if defined(HALF)
#if defined(DYNAMIC_ARCH)
#if defined(BUILD_BFLOAT16)
#define HALF_DTYPE_ALIGN_K gotoblas->sbgemm_align_k
#else
#define HALF_DTYPE_ALIGN_K gotoblas->bgemm_align_k
#endif
#else
#if defined(BUILD_BFLOAT16)
#define HALF_DTYPE_ALIGN_K SBGEMM_ALIGN_K
#else
#define HALF_DTYPE_ALIGN_K BGEMM_ALIGN_K
#endif
#endif
#endif

static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IFLOAT *sb, BLASLONG mypos){

IFLOAT *buffer[DIVIDE_RATE];
@@ -325,11 +341,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
BLASLONG pad_min_l = min_l;

#if defined(HALF)
#if defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
#else
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
#endif
pad_min_l = (min_l + HALF_DTYPE_ALIGN_K - 1) & ~(HALF_DTYPE_ALIGN_K - 1);
#endif

/* Determine step size in m


+ 29
- 2
kernel/Makefile.L3 View File

@@ -1,3 +1,30 @@
###############################################################################
# Copyright (c) 2025, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################
USE_GEMM3M = 0
OS := $(shell uname)

@@ -660,7 +687,7 @@ $(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY)
$(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY)
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@

ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
#ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))

$(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY)
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
@@ -668,7 +695,7 @@ $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY)
$(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY)
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@

endif
#endif
endif

$(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY)


Loading…
Cancel
Save