Add infrastructure for BGEMVpull/5386/head
| @@ -81,7 +81,9 @@ test/ZBLAT2.SUMM | |||
| test/ZBLAT3.SUMM | |||
| test/ZBLAT3_3M.SUMM | |||
| test/SHBLAT3.SUMM | |||
| test/SBBLAT2.SUMM | |||
| test/SBBLAT3.SUMM | |||
| test/BBLAT2.SUMM | |||
| test/BBLAT3.SUMM | |||
| test/cblat1 | |||
| test/cblat2 | |||
| @@ -97,7 +99,9 @@ test/sblat3 | |||
| test/sblat3_3m | |||
| test/test_shgemm | |||
| test/test_sbgemm | |||
| test/test_sbgemv | |||
| test/test_bgemm | |||
| test/test_bgemv | |||
| test/zblat1 | |||
| test/zblat2 | |||
| test/zblat3 | |||
| @@ -465,6 +465,7 @@ void cblas_sbdtobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *in, OPEN | |||
| void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, float *out, OPENBLAS_CONST blasint incout); | |||
| /* convert BFLOAT16 array to double array */ | |||
| void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout); | |||
| void cblas_bgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 beta, bfloat16 *y, OPENBLAS_CONST blasint incy); | |||
| /* dot production of BFLOAT16 input arrays, and output as float */ | |||
| float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy); | |||
| void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy); | |||
| @@ -110,6 +110,7 @@ macro(SetDefaultL1) | |||
| SetFallback(SROTMKERNEL rotm.S) | |||
| SetFallback(DROTMKERNEL rotm.S) | |||
| SetFallback(QROTMKERNEL rotm.S) | |||
| SetFallback(BSCALKERNEL ../generic/scal.c) | |||
| SetFallback(SSCALKERNEL scal.S) | |||
| SetFallback(DSCALKERNEL scal.S) | |||
| SetFallback(CSCALKERNEL zscal.S) | |||
| @@ -169,6 +170,8 @@ if (BUILD_BFLOAT16) | |||
| SetFallback(SHSWAPKERNEL ../arm/swap.c) | |||
| SetFallback(TOBF16KERNEL ../x86_64/tobf16.c) | |||
| SetFallback(BF16TOKERNEL ../x86_64/bf16to.c) | |||
| SetFallback(BGEMVNKERNEL ../generic/gemv_n.c) | |||
| SetFallback(BGEMVTKERNEL ../generic/gemv_t.c) | |||
| SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c) | |||
| SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c) | |||
| endif () | |||
| @@ -221,6 +224,8 @@ macro(SetDefaultL2) | |||
| SetFallback(XHEMV_V_KERNEL ../generic/zhemv_k.c) | |||
| SetFallback(XHEMV_M_KERNEL ../generic/zhemv_k.c) | |||
| if (BUILD_BFLOAT16) | |||
| SetFallback(BGEMVNKERNEL ../generic/gemv_n.c) | |||
| SetFallback(BGEMVTKERNEL ../generic/gemv_t.c) | |||
| SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c) | |||
| SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c) | |||
| SetFallback(SHGERKERNEL ../generic/ger.c) | |||
| @@ -375,7 +375,7 @@ function(GenerateNamedObjects sources_in) | |||
| if (NOT no_float_type) | |||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||
| string(TOLOWER ${float_char} float_char) | |||
| if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEMM") | |||
| if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEM") | |||
| set (float_char "sb") | |||
| endif () | |||
| endif () | |||
| @@ -30,6 +30,11 @@ | |||
| #define COMMON_B_H | |||
| #ifndef DYNAMIC_ARCH | |||
| #define BGEMV_N_K bgemv_n | |||
| #define BGEMV_T_K bgemv_t | |||
| #define BSCAL_K bscal_k | |||
| #define BGEMM_ONCOPY bgemm_oncopy | |||
| #define BGEMM_OTCOPY bgemm_otcopy | |||
| @@ -45,6 +50,10 @@ | |||
| #define BGEMM_KERNEL bgemm_kernel | |||
| #else | |||
| #define BGEMV_N_K gotoblas->bgemv_n | |||
| #define BGEMV_T_K gotoblas->bgemv_t | |||
| #define BSCAL_K gotoblas->bscal_k | |||
| #define BGEMM_ONCOPY gotoblas->bgemm_oncopy | |||
| #define BGEMM_OTCOPY gotoblas->bgemm_otcopy | |||
| @@ -60,6 +60,7 @@ double BLASFUNC(dsdot) (blasint *, float *, blasint *, float *, blasint *); | |||
| double BLASFUNC(ddot) (blasint *, double *, blasint *, double *, blasint *); | |||
| xdouble BLASFUNC(qdot) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||
| void BLASFUNC(bscal) (blasint *, bfloat16 *, bfloat16 *, blasint *); | |||
| float BLASFUNC(sbdot) (blasint *, bfloat16 *, blasint *, bfloat16 *, blasint *); | |||
| void BLASFUNC(sbstobf16) (blasint *, float *, blasint *, bfloat16 *, blasint *); | |||
| void BLASFUNC(sbdtobf16) (blasint *, double *, blasint *, bfloat16 *, blasint *); | |||
| @@ -256,6 +257,8 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||
| void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||
| xdouble *, blasint *, xdouble *, blasint *); | |||
| void BLASFUNC(bgemv)(char *, blasint *, blasint *, bfloat16 *, bfloat16 *, blasint *, | |||
| bfloat16 *, blasint *, bfloat16 *, bfloat16 *, blasint *); | |||
| void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *, | |||
| bfloat16 *, blasint *, float *, float *, blasint *); | |||
| void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *, | |||
| @@ -1,4 +1,5 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2025 The OpenBLAS Project. */ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| @@ -169,6 +170,9 @@ BLASLONG icmin_k(BLASLONG, float *, BLASLONG); | |||
| BLASLONG izmin_k(BLASLONG, double *, BLASLONG); | |||
| BLASLONG ixmin_k(BLASLONG, xdouble *, BLASLONG); | |||
| int bscal_k(BLASLONG, BLASLONG, BLASLONG, bfloat16, | |||
| bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG); | |||
| int sscal_k(BLASLONG, BLASLONG, BLASLONG, float, | |||
| float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
| int dscal_k(BLASLONG, BLASLONG, BLASLONG, double, | |||
| @@ -1,4 +1,5 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2025 The OpenBLAS Project */ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| @@ -44,6 +45,11 @@ | |||
| extern "C" { | |||
| #endif | |||
| int bgemv_n(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG); | |||
| int bgemv_t(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG); | |||
| int bgemv_thread_n(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG, int); | |||
| int bgemv_thread_t(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG, int); | |||
| int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG); | |||
| int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG); | |||
| int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int); | |||
| @@ -705,6 +705,11 @@ | |||
| #elif defined(BFLOAT16) && defined(BGEMM) | |||
| #define SCAL_K BSCAL_K | |||
| #define GEMV_N BGEMV_N_K | |||
| #define GEMV_T BGEMV_T_K | |||
| #define GEMM_BETA BGEMM_BETA | |||
| #define GEMM_KERNEL_N BGEMM_KERNEL | |||
| #define GEMM_KERNEL_L BGEMM_KERNEL | |||
| @@ -754,8 +759,8 @@ | |||
| #define D_BF16_TO_K DBF16TOD_K | |||
| #define S_TO_BF16_K SBSTOBF16_K | |||
| #define S_BF16_TO_K SBF16TOS_K | |||
| #define SBGEMV_N SBGEMV_N_K | |||
| #define SBGEMV_T SBGEMV_T_K | |||
| #define GEMV_N SBGEMV_N_K | |||
| #define GEMV_T SBGEMV_T_K | |||
| #define AMAX_K SAMAX_K | |||
| #define AMIN_K SAMIN_K | |||
| @@ -773,8 +778,6 @@ | |||
| #define AXPYC_K SAXPYC_K | |||
| #define AXPBY_K SAXPBY_K | |||
| #define SCAL_K SSCAL_K | |||
| #define GEMV_N SGEMV_N | |||
| #define GEMV_T SGEMV_T | |||
| #define SYMV_U SSYMV_U | |||
| #define SYMV_L SSYMV_L | |||
| #define GERU_K SGERU_K | |||
| @@ -98,10 +98,14 @@ int (*shgemm_otcopy )(BLASLONG, BLASLONG, hfloat16 *, BLASLONG, hfloat16 *); | |||
| int (*sbrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); | |||
| int (*sbrotm_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
| int (*bscal_k) (BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG); | |||
| int (*sbaxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
| int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
| int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
| int (*bgemv_n) (BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG); | |||
| int (*bgemv_t) (BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG); | |||
| int (*sbgemv_n) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG); | |||
| int (*sbgemv_t) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG); | |||
| int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
| @@ -1,3 +1,31 @@ | |||
| ############################################################################### | |||
| # Copyright (c) 2025 The OpenBLAS Project | |||
| # All rights reserved. | |||
| # Redistribution and use in source and binary forms, with or without | |||
| # modification, are permitted provided that the following conditions are | |||
| # met: | |||
| # 1. Redistributions of source code must retain the above copyright | |||
| # notice, this list of conditions and the following disclaimer. | |||
| # 2. Redistributions in binary form must reproduce the above copyright | |||
| # notice, this list of conditions and the following disclaimer in | |||
| # the documentation and/or other materials provided with the | |||
| # distribution. | |||
| # 3. Neither the name of the OpenBLAS project nor the names of | |||
| # its contributors may be used to endorse or promote products | |||
| # derived from this software without specific prior written permission. | |||
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| # ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
| # POSSIBILITY OF SUCH DAMAGE. | |||
| ############################################################################### | |||
| TOPDIR = ../.. | |||
| include ../../Makefile.system | |||
| @@ -423,6 +451,9 @@ XBLASOBJS += \ | |||
| xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| BBLASOBJS += \ | |||
| bgemv_thread_n$(TSUFFIX).$(SUFFIX) \ | |||
| bgemv_thread_t$(TSUFFIX).$(SUFFIX) | |||
| SBBLASOBJS += \ | |||
| sbgemv_thread_n$(TSUFFIX).$(SUFFIX) \ | |||
| sbgemv_thread_t$(TSUFFIX).$(SUFFIX) | |||
| @@ -3707,6 +3738,10 @@ xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h | |||
| $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| bgemv_thread_n.$(SUFFIX) bgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| bgemv_thread_t.$(SUFFIX) bgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| sbgemv_thread_n.$(SUFFIX) sbgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| sbgemv_thread_t.$(SUFFIX) sbgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h | |||
| @@ -1,4 +1,5 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2025 The OpenBLAS Project. */ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| @@ -41,21 +42,21 @@ | |||
| #include "common.h" | |||
| #ifndef TRANSA | |||
| #define SBGEMV SBGEMV_N | |||
| #define GEMV GEMV_N | |||
| #else | |||
| #define SBGEMV SBGEMV_T | |||
| #define GEMV GEMV_T | |||
| #endif | |||
| static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *dummy2, BLASLONG dummy3){ | |||
| bfloat16 *a, *x; | |||
| float *y; | |||
| IFLOAT *a, *x; | |||
| FLOAT *y; | |||
| BLASLONG lda, incx, incy; | |||
| BLASLONG m_from, m_to, n_from, n_to; | |||
| a = (bfloat16 *)args->a; | |||
| x = (bfloat16 *)args->b; | |||
| y = (float *)args->c; | |||
| a = (IFLOAT *)args->a; | |||
| x = (IFLOAT *)args->b; | |||
| y = (FLOAT *)args->c; | |||
| lda = args->lda; | |||
| incx = args->ldb; | |||
| @@ -77,12 +78,12 @@ static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||
| y += n_from * incy; | |||
| #endif | |||
| SBGEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy); | |||
| GEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy); | |||
| return 0; | |||
| } | |||
| int CNAME(BLASLONG m, BLASLONG n, float alpha, bfloat16 *a, BLASLONG lda, bfloat16 *x, BLASLONG incx, float beta, float *y, BLASLONG incy, int threads) | |||
| int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG incx, FLOAT beta, FLOAT *y, BLASLONG incy, int threads) | |||
| { | |||
| blas_arg_t args; | |||
| blas_queue_t queue[MAX_CPU_NUMBER]; | |||
| @@ -1,5 +1,33 @@ | |||
| #!/bin/sh | |||
| ############################################################################### | |||
| # Copyright (c) 2025, The OpenBLAS Project | |||
| # All rights reserved. | |||
| # Redistribution and use in source and binary forms, with or without | |||
| # modification, are permitted provided that the following conditions are | |||
| # met: | |||
| # 1. Redistributions of source code must retain the above copyright | |||
| # notice, this list of conditions and the following disclaimer. | |||
| # 2. Redistributions in binary form must reproduce the above copyright | |||
| # notice, this list of conditions and the following disclaimer in | |||
| # the documentation and/or other materials provided with the | |||
| # distribution. | |||
| # 3. Neither the name of the OpenBLAS project nor the names of | |||
| # its contributors may be used to endorse or promote products | |||
| # derived from this software without specific prior written permission. | |||
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| # ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
| # POSSIBILITY OF SUCH DAMAGE. | |||
| ############################################################################### | |||
| # Changelog | |||
| # 2017/09/03 staticfloat | |||
| # Added zsymv and csymv into @lapackobjs2 so they are properly renamed | |||
| @@ -51,7 +79,7 @@ blasobjsz=" | |||
| zgeadd dzsum zgemmt zgemmtr" | |||
| blasobjs="lsame xerbla" | |||
| bfblasobjs="bgemm sbgemm sbgemmt sbgemmtr sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod" | |||
| bfblasobjs="bgemm bgemv sbgemm sbgemmt sbgemmtr sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod" | |||
| hfblasobjs="shgemm" | |||
| cblasobjsc=" | |||
| cblas_caxpy cblas_ccopy cblas_cdotc cblas_cdotu cblas_cgbmv cblas_cgemm cblas_cgemv | |||
| @@ -1,5 +1,33 @@ | |||
| #!/usr/bin/env perl | |||
| ############################################################################### | |||
| # Copyright (c) 2025, The OpenBLAS Project | |||
| # All rights reserved. | |||
| # Redistribution and use in source and binary forms, with or without | |||
| # modification, are permitted provided that the following conditions are | |||
| # met: | |||
| # 1. Redistributions of source code must retain the above copyright | |||
| # notice, this list of conditions and the following disclaimer. | |||
| # 2. Redistributions in binary form must reproduce the above copyright | |||
| # notice, this list of conditions and the following disclaimer in | |||
| # the documentation and/or other materials provided with the | |||
| # distribution. | |||
| # 3. Neither the name of the OpenBLAS project nor the names of | |||
| # its contributors may be used to endorse or promote products | |||
| # derived from this software without specific prior written permission. | |||
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| # ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
| # POSSIBILITY OF SUCH DAMAGE. | |||
| ############################################################################### | |||
| # Changelog | |||
| # 2017/09/03 staticfloat | |||
| # Added zsymv and csymv into @lapackobjs2 so they are properly renamed | |||
| @@ -51,7 +79,7 @@ | |||
| zgeadd, dzsum, zgemmt,zgemmtr); | |||
| @blasobjs = (lsame, xerbla); | |||
| @bfblasobjs = (bgemm, sbgemm, sbgemmt, sbgemmtr, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod); | |||
| @bfblasobjs = (bgemm, bgemv, sbgemm, sbgemmt, sbgemmtr, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod); | |||
| @hfblasobjs = (shgemm); | |||
| @cblasobjsc = ( | |||
| cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, | |||
| @@ -150,11 +150,13 @@ endif () | |||
| GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG}) | |||
| if (BUILD_BFLOAT16) | |||
| GenerateNamedObjects("scal.c" "BGEMM" "bscal" ${CBLAS_FLAG} "" "" true "BFLOAT16") | |||
| GenerateNamedObjects("bf16dot.c" "" "sbdot" ${CBLAS_FLAG} "" "" true "BFLOAT16") | |||
| GenerateNamedObjects("gemm.c" "BGEMM" "bgemm" ${CBLAS_FLAG} "" "" true "BFLOAT16") | |||
| GenerateNamedObjects("gemm.c" "" "sbgemm" ${CBLAS_FLAG} "" "" true "BFLOAT16") | |||
| GenerateNamedObjects("sbgemmt.c" "" "sbgemmt" ${CBLAS_FLAG} "" "" true "BFLOAT16") | |||
| GenerateNamedObjects("sbgemmt.c" "RNAME" "sbgemmtr" ${CBLAS_FLAG} "" "" true "BFLOAT16") | |||
| GenerateNamedObjects("sbgemv.c" "BGEMM" "bgemv" ${CBLAS_FLAG} "" "" true "BFLOAT16") | |||
| GenerateNamedObjects("sbgemv.c" "" "sbgemv" ${CBLAS_FLAG} "" "" true "BFLOAT16") | |||
| GenerateNamedObjects("tobf16.c" "SINGLE_PREC" "sbstobf16" ${CBLAS_FLAG} "" "" true "BFLOAT16") | |||
| GenerateNamedObjects("tobf16.c" "DOUBLE_PREC" "sbdtobf16" ${CBLAS_FLAG} "" "" true "BFLOAT16") | |||
| @@ -75,7 +75,9 @@ SBLAS3OBJS = \ | |||
| sgeadd.$(SUFFIX) sgemmt.$(SUFFIX) sgemmtr.$(SUFFIX) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| BBLAS3OBJ = bgemm.$(SUFFIX) | |||
| BBLAS3OBJS = bgemm.$(SUFFIX) | |||
| BBLAS2OBJS = bgemv.$(SUFFIX) | |||
| BBLAS1OBJS = bscal.$(SUFFIX) | |||
| SBBLAS1OBJS = sbdot.$(SUFFIX) | |||
| SBBLAS2OBJS = sbgemv.$(SUFFIX) | |||
| SBBLAS3OBJS = sbgemm.$(SUFFIX) sbgemmt.$(SUFFIX) sbgemmtr.$(SUFFIX) | |||
| @@ -319,6 +321,8 @@ CSBLAS3OBJS = \ | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| CBBLAS3OBJS = cblas_bgemm.$(SUFFIX) | |||
| CBBLAS2OBJS = cblas_bgemv.$(SUFFIX) | |||
| CBBLAS1OBJS = cblas_bscal.$(SUFFIX) | |||
| CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX) | |||
| CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX) | |||
| CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) cblas_sbgemmt.$(SUFFIX) cblas_sbgemmtr.$(SUFFIX) cblas_sbgemm_batch.$(SUFFIX) | |||
| @@ -423,7 +427,9 @@ override CFLAGS += -I. | |||
| SBLAS1OBJS += $(CSBLAS1OBJS) | |||
| SBLAS2OBJS += $(CSBLAS2OBJS) | |||
| SBLAS3OBJS += $(CSBLAS3OBJS) | |||
| BBLAS3OBJ += $(CBBLAS3OBJS) | |||
| BBLAS3OBJS += $(CBBLAS3OBJS) | |||
| BBLAS2OBJS += $(CBBLAS2OBJS) | |||
| BBLAS1OBJS += $(CBBLAS1OBJS) | |||
| SBBLAS1OBJS += $(CSBBLAS1OBJS) | |||
| SBBLAS2OBJS += $(CSBBLAS2OBJS) | |||
| SBBLAS3OBJS += $(CSBBLAS3OBJS) | |||
| @@ -443,7 +449,7 @@ SBEXTOBJS += $(CSBEXTOBJS) | |||
| CBAUXOBJS += $(CXERBLAOBJ) | |||
| endif | |||
| BBLASOBJS = $(BBLAS3OBJ) | |||
| BBLASOBJS = $(BBLAS3OBJS) $(BBLAS2OBJS) $(BBLAS1OBJS) | |||
| SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) | |||
| SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS2OBJS) $(SBBLAS3OBJS) | |||
| SHBLASOBJS = $(SHBLAS3OBJS) | |||
| @@ -589,7 +595,7 @@ clean :: | |||
| level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) | |||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
| level2 : $(SBBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | |||
| level2 : $(SBBLAS2OBJS) $(BBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | |||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
| level3 : $(SBBLAS3OBJS) $(BBLAS3OBJ) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(SHBLAS3OBJS) | |||
| @@ -824,6 +830,8 @@ dsdot.$(SUFFIX) dsdot.$(PSUFFIX) : dsdot.c | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| bscal.$(SUFFIX) bscal.$(PSUFFIX) : scal.c | |||
| $(CC) $(CFLAGS) -DBGEMM -c $< -o $(@F) | |||
| sbdot.$(SUFFIX) sbdot.$(PSUFFIX) : bf16dot.c | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| sbstobf16.$(SUFFIX) sbstobf16.$(PSUFFIX) : tobf16.c | |||
| @@ -981,6 +989,8 @@ xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c | |||
| $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| bgemv.$(SUFFIX) bgemv.$(PSUFFIX) : sbgemv.c | |||
| $(CC) $(CFLAGS) -DBGEMM -c $< -o $(@F) | |||
| sbgemv.$(SUFFIX) sbgemv.$(PSUFFIX) : sbgemv.c | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| endif | |||
| @@ -1653,6 +1663,8 @@ cblas_dsdot.$(SUFFIX) cblas_dsdot.$(PSUFFIX) : dsdot.c | |||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| cblas_bscal.$(SUFFIX) cblas_bscal.$(PSUFFIX) : scal.c | |||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
| cblas_sbdot.$(SUFFIX) cblas_sbdot.$(PSUFFIX) : bf16dot.c | |||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
| cblas_sbstobf16.$(SUFFIX) cblas_sbstobf16.$(PSUFFIX) : tobf16.c | |||
| @@ -1807,6 +1819,8 @@ cblas_zdrot.$(SUFFIX) cblas_zdrot.$(PSUFFIX) : zrot.c | |||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| cblas_bgemv.$(SUFFIX) cblas_bgemv.$(PSUFFIX) : sbgemv.c | |||
| $(CC) -DCBLAS -DBGEMM -c $(CFLAGS) $< -o $(@F) | |||
| cblas_sbgemv.$(SUFFIX) cblas_sbgemv.$(PSUFFIX) : sbgemv.c | |||
| $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | |||
| endif | |||
| @@ -1,5 +1,5 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2024, The OpenBLAS Project. */ | |||
| /* Copyright 2024-2025 The OpenBLAS Project. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| @@ -305,7 +305,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
| #endif | |||
| int (*gemv[]) (BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG, | |||
| IFLOAT *, BLASLONG, FLOAT, FLOAT *, BLASLONG) = { | |||
| SBGEMV_N, SBGEMV_T,}; | |||
| GEMV_N, GEMV_T,}; | |||
| if (m == 0) | |||
| @@ -1,4 +1,5 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2025 The OpenBLAS Project. */ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| @@ -43,17 +44,25 @@ | |||
| #include "functable.h" | |||
| #endif | |||
| #ifdef BGEMM | |||
| #define GEMV_THREAD_N bgemv_thread_n | |||
| #define GEMV_THREAD_T bgemv_thread_t | |||
| #define ERROR_NAME "BGEMV " | |||
| #else | |||
| #define GEMV_THREAD_N sbgemv_thread_n | |||
| #define GEMV_THREAD_T sbgemv_thread_t | |||
| #define ERROR_NAME "SBGEMV " | |||
| #endif | |||
| #ifdef SMP | |||
| static int (*sbgemv_thread[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG, int) = { | |||
| sbgemv_thread_n, sbgemv_thread_t, | |||
| static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG, IFLOAT * , BLASLONG, FLOAT, FLOAT *, BLASLONG, int) = { | |||
| GEMV_THREAD_N, GEMV_THREAD_T, | |||
| }; | |||
| #endif | |||
| #ifndef CBLAS | |||
| void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasint *LDA, bfloat16 *x, blasint *INCX, float *BETA, float *y, blasint *INCY) | |||
| void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *ALPHA, IFLOAT *a, blasint *LDA, IFLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY) | |||
| { | |||
| char trans = *TRANS; | |||
| blasint m = *M; | |||
| @@ -61,14 +70,14 @@ void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasin | |||
| blasint lda = *LDA; | |||
| blasint incx = *INCX; | |||
| blasint incy = *INCY; | |||
| float alpha = *ALPHA; | |||
| float beta = *BETA; | |||
| FLOAT alpha = *ALPHA; | |||
| FLOAT beta = *BETA; | |||
| #ifdef SMP | |||
| int nthreads; | |||
| #endif | |||
| int (*sbgemv[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG) = { | |||
| SBGEMV_N, SBGEMV_T, | |||
| int (*gemv[])(BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG, IFLOAT * , BLASLONG, FLOAT, FLOAT *, BLASLONG) = { | |||
| GEMV_N, GEMV_T, | |||
| }; | |||
| blasint info; | |||
| @@ -104,7 +113,7 @@ void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasin | |||
| #else | |||
| void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, float alpha, bfloat16 *a, blasint lda, bfloat16 *x, blasint incx, float beta, float *y, blasint incy) | |||
| void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, FLOAT alpha, IFLOAT *a, blasint lda, IFLOAT *x, blasint incx, FLOAT beta, FLOAT *y, blasint incy) | |||
| { | |||
| blasint lenx, leny; | |||
| int trans; | |||
| @@ -113,8 +122,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasi | |||
| int nthreads; | |||
| #endif | |||
| int (*sbgemv[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG) = { | |||
| SBGEMV_N, SBGEMV_T, | |||
| int (*gemv[])(BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG, IFLOAT * , BLASLONG, FLOAT, FLOAT *, BLASLONG) = { | |||
| GEMV_N, GEMV_T, | |||
| }; | |||
| PRINT_DEBUG_CNAME; | |||
| @@ -166,8 +175,17 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasi | |||
| leny = m; | |||
| } | |||
| if (alpha == ZERO) { | |||
| if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0); | |||
| #ifdef BGEMM | |||
| float alpha_float, beta_float; | |||
| SBF16TOS_K(1, &alpha, 1, &alpha_float, 1); | |||
| SBF16TOS_K(1, &beta, 1, &beta_float, 1); | |||
| #else | |||
| float alpha_float = alpha; | |||
| float beta_float = beta; | |||
| #endif | |||
| if (alpha_float == ZERO) { | |||
| if (beta_float != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0); | |||
| return; | |||
| } | |||
| @@ -185,10 +203,10 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasi | |||
| if (nthreads == 1) { | |||
| #endif | |||
| (sbgemv[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy); | |||
| (gemv[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy); | |||
| #ifdef SMP | |||
| } else { | |||
| (sbgemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy, nthreads); | |||
| (gemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy, nthreads); | |||
| } | |||
| #endif | |||
| @@ -1,4 +1,5 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2025 The OpenBLAS Project. */ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| @@ -68,7 +69,14 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){ | |||
| if (incx <= 0 || n <= 0) return; | |||
| if (alpha == ONE) return; | |||
| #ifdef BGEMM | |||
| float alpha_float; | |||
| SBF16TOS_K(1, &alpha, 1, &alpha_float, 1); | |||
| #else | |||
| float alpha_float = alpha; | |||
| #endif | |||
| if (alpha_float == ONE) return; | |||
| IDEBUG_START; | |||
| @@ -121,6 +121,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| # sbdot | |||
| if (BUILD_BFLOAT16) | |||
| GenerateNamedObjects("${KERNELDIR}/${BSCALKERNEL}" "BGEMM" "scal_k" false "" "" false "BFLOAT16") | |||
| GenerateNamedObjects("${KERNELDIR}/${SBDOTKERNEL}" "SBDOT" "dot_k" false "" "" false "BFLOAT16") | |||
| GenerateNamedObjects("${KERNELDIR}/${BF16TOKERNEL}" "SINGLE" "f16tos_k" false "" "" false "BFLOAT16") | |||
| GenerateNamedObjects("${KERNELDIR}/${BF16TOKERNEL}" "DOUBLE" "bf16tod_k" false "" "" false "DOUBLE") | |||
| @@ -222,6 +223,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") | |||
| endif () | |||
| if (BUILD_BFLOAT16) | |||
| GenerateNamedObjects("${KERNELDIR}/${BGEMVNKERNEL}" "BGEMM" "gemv_n" false "" "" false "BFLOAT16") | |||
| GenerateNamedObjects("${KERNELDIR}/${BGEMVTKERNEL}" "BGEMM" "gemv_t" false "" "" false "BFLOAT16") | |||
| GenerateNamedObjects("${KERNELDIR}/${SBGEMVNKERNEL}" "" "gemv_n" false "" "" false "BFLOAT16") | |||
| GenerateNamedObjects("${KERNELDIR}/${SBGEMVTKERNEL}" "" "gemv_t" false "" "" false "BFLOAT16") | |||
| endif () | |||
| @@ -1,3 +1,31 @@ | |||
| ############################################################################### | |||
| # Copyright (c) 2025 The OpenBLAS Project | |||
| # All rights reserved. | |||
| # Redistribution and use in source and binary forms, with or without | |||
| # modification, are permitted provided that the following conditions are | |||
| # met: | |||
| # 1. Redistributions of source code must retain the above copyright | |||
| # notice, this list of conditions and the following disclaimer. | |||
| # 2. Redistributions in binary form must reproduce the above copyright | |||
| # notice, this list of conditions and the following disclaimer in | |||
| # the documentation and/or other materials provided with the | |||
| # distribution. | |||
| # 3. Neither the name of the OpenBLAS project nor the names of | |||
| # its contributors may be used to endorse or promote products | |||
| # derived from this software without specific prior written permission. | |||
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| # ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
| # POSSIBILITY OF SUCH DAMAGE. | |||
| ############################################################################### | |||
| FMAFLAG= | |||
| ifndef OLDGCC | |||
| ifdef HAVE_FMA3 | |||
| @@ -271,6 +299,10 @@ XDOTKERNEL = zdot.S | |||
| endif | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| ifndef BSCALKERNEL | |||
| BSCALKERNEL = ../generic/scal.c | |||
| endif | |||
| ifndef SBDOTKERNEL | |||
| SBDOTKERNEL = ../x86_64/sbdot.c | |||
| endif | |||
| @@ -551,6 +583,8 @@ XBLASOBJS += \ | |||
| xscal_k$(TSUFFIX).$(SUFFIX) xswap_k$(TSUFFIX).$(SUFFIX) xsum_k$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| BBLASOBJS += \ | |||
| bscal_k$(TSUFFIX).$(SUFFIX) | |||
| SBBLASOBJS += \ | |||
| sbdot_k$(TSUFFIX).$(SUFFIX) | |||
| SBEXTOBJS += \ | |||
| @@ -778,6 +812,8 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@ | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| $(KDIR)bscal_k$(TSUFFIX).$(SUFFIX) $(KDIR)bscal_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(BSCALKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ | |||
| $(KDIR)sbdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sbdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBDOTKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@ | |||
| $(KDIR)sbstobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL) | |||
| @@ -1,3 +1,31 @@ | |||
| ############################################################################### | |||
| # Copyright (c) 2025 The OpenBLAS Project | |||
| # All rights reserved. | |||
| # Redistribution and use in source and binary forms, with or without | |||
| # modification, are permitted provided that the following conditions are | |||
| # met: | |||
| # 1. Redistributions of source code must retain the above copyright | |||
| # notice, this list of conditions and the following disclaimer. | |||
| # 2. Redistributions in binary form must reproduce the above copyright | |||
| # notice, this list of conditions and the following disclaimer in | |||
| # the documentation and/or other materials provided with the | |||
| # distribution. | |||
| # 3. Neither the name of the OpenBLAS project nor the names of | |||
| # its contributors may be used to endorse or promote products | |||
| # derived from this software without specific prior written permission. | |||
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| # ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
| # POSSIBILITY OF SUCH DAMAGE. | |||
| ############################################################################### | |||
| FMAFLAG= | |||
| ifndef OLDGCC | |||
| ifdef HAVE_FMA3 | |||
| @@ -56,6 +84,14 @@ XGEMVTKERNEL = zgemv_t.S | |||
| endif | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| ifndef BGEMVNKERNEL | |||
| BGEMVNKERNEL = ../generic/gemv_n.c | |||
| endif | |||
| ifndef BGEMVTKERNEL | |||
| BGEMVTKERNEL = ../generic/gemv_t.c | |||
| endif | |||
| ifndef SBGEMVNKERNEL | |||
| SBGEMVNKERNEL = ../x86_64/sbgemv_n.c | |||
| endif | |||
| @@ -255,6 +291,9 @@ XBLASOBJS += \ | |||
| xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| BBLASOBJS += \ | |||
| bgemv_n$(TSUFFIX).$(SUFFIX) \ | |||
| bgemv_t$(TSUFFIX).$(SUFFIX) | |||
| SBBLASOBJS += \ | |||
| sbgemv_n$(TSUFFIX).$(SUFFIX) \ | |||
| sbgemv_t$(TSUFFIX).$(SUFFIX) | |||
| @@ -513,5 +552,9 @@ $(KDIR)sbgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sbgemv_n$(TPSUFFIX).$(PSUFFIX) : $(KE | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@ | |||
| $(KDIR)sbgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sbgemv_t$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMVTKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@ | |||
| $(KDIR)bgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)bgemv_n$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(BGEMVNKERNEL) | |||
| $(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX $< -o $@ | |||
| $(KDIR)bgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)bgemv_t$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(BGEMVTKERNEL) | |||
| $(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX $< -o $@ | |||
| endif | |||
| @@ -0,0 +1,64 @@ | |||
| /*************************************************************************** | |||
| * Copyright (c) 2025, The OpenBLAS Project | |||
| * All rights reserved. | |||
| * Redistribution and use in source and binary forms, with or without | |||
| * modification, are permitted provided that the following conditions are | |||
| * met: | |||
| * 1. Redistributions of source code must retain the above copyright | |||
| * notice, this list of conditions and the following disclaimer. | |||
| * 2. Redistributions in binary form must reproduce the above copyright | |||
| * notice, this list of conditions and the following disclaimer in | |||
| * the documentation and/or other materials provided with the | |||
| * distribution. | |||
| * 3. Neither the name of the OpenBLAS project nor the names of | |||
| * its contributors may be used to endorse or promote products | |||
| * derived from this software without specific prior written permission. | |||
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| * ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
| * POSSIBILITY OF SUCH DAMAGE. | |||
| * *****************************************************************************/ | |||
| #if defined(BFLOAT16) && defined(BFLOAT16CONVERSION) | |||
| static float | |||
| bfloat16tof32 (bfloat16 value) | |||
| { | |||
| blasint one = 1; | |||
| float result; | |||
| sbf16tos_(&one, &value, &one, &result, &one); | |||
| return result; | |||
| } | |||
| #ifdef BGEMM | |||
| static bfloat16 f32tobfloat16(float value) { | |||
| blasint one = 1; | |||
| bfloat16 result; | |||
| sbstobf16_(&one, &value, &one, &result, &one); | |||
| return result; | |||
| } | |||
| #endif | |||
| #ifdef BGEMM | |||
| #define ALPHA bfloat16tof32(alpha) | |||
| #define BETA bfloat16tof32(beta) | |||
| #define BF16TOF32(x) (bfloat16tof32(x)) | |||
| #define F32TOBF16(x) (f32tobfloat16(x)) | |||
| #else | |||
| #define ALPHA alpha | |||
| #define BETA beta | |||
| #define BF16TOF32(x) (bfloat16tof32(x)) | |||
| #define F32TOBF16(x) x | |||
| #endif | |||
| #else | |||
| #define ALPHA alpha | |||
| #define BETA beta | |||
| #define BF16TOF32(x) x | |||
| #define F32TOBF16(x) x | |||
| #endif | |||
| @@ -27,39 +27,8 @@ | |||
| * *****************************************************************************/ | |||
| #include "common.h" | |||
| #if defined(BFLOAT16) && defined(BFLOAT16CONVERSION) | |||
| static float | |||
| bfloat16tof32 (bfloat16 value) | |||
| { | |||
| blasint one = 1; | |||
| float result; | |||
| sbf16tos_(&one, &value, &one, &result, &one); | |||
| return result; | |||
| } | |||
| #ifdef BGEMM | |||
| static bfloat16 f32tobfloat16(float value) { | |||
| blasint one = 1; | |||
| bfloat16 result; | |||
| sbstobf16_(&one, &value, &one, &result, &one); | |||
| return result; | |||
| } | |||
| #endif | |||
| #include "bf16_macros.h" | |||
| #ifdef BGEMM | |||
| #define ALPHA bfloat16tof32(alpha) | |||
| #define BF16TOF32(x) (bfloat16tof32(x)) | |||
| #define F32TOBF16(x) (f32tobfloat16(x)) | |||
| #else | |||
| #define ALPHA alpha | |||
| #define BF16TOF32(x) (bfloat16tof32(x)) | |||
| #define F32TOBF16(x) x | |||
| #endif | |||
| #else | |||
| #define ALPHA alpha | |||
| #define BF16TOF32(x) x | |||
| #define F32TOBF16(x) x | |||
| #endif | |||
| int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,IFLOAT* ba,IFLOAT* bb,FLOAT* C,BLASLONG ldc | |||
| #ifdef TRMMKERNEL | |||
| ,BLASLONG offset | |||
| @@ -0,0 +1,70 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2014, 2025 The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include "bf16_macros.h" | |||
| int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i; | |||
| BLASLONG ix, iy; | |||
| BLASLONG j; | |||
| FLOAT *a_ptr; | |||
| #ifdef BGEMM | |||
| float temp; | |||
| #else | |||
| FLOAT temp; | |||
| #endif | |||
| iy = 0; | |||
| for (BLASLONG i = 0; i < m; i++) | |||
| { | |||
| temp = 0.0; | |||
| ix = 0; | |||
| a_ptr = a; | |||
| for (BLASLONG j = 0; j < n; j++) | |||
| { | |||
| temp += BF16TOF32(a_ptr[i]) * BF16TOF32(x[ix]); | |||
| ix += inc_x; | |||
| a_ptr += lda; | |||
| } | |||
| if (BETA == ZERO) | |||
| { | |||
| y[iy] = F32TOBF16(ALPHA * temp); | |||
| } | |||
| else | |||
| { | |||
| y[iy] = F32TOBF16(ALPHA * temp + BETA * BF16TOF32(y[iy])); | |||
| } | |||
| iy += inc_y; | |||
| } | |||
| return (0); | |||
| } | |||
| @@ -0,0 +1,60 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, 2025 The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include "bf16_macros.h" | |||
| int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i; | |||
| BLASLONG ix, iy; | |||
| BLASLONG j; | |||
| FLOAT *a_ptr; | |||
| #ifdef BGEMM | |||
| float temp; | |||
| #else | |||
| FLOAT temp; | |||
| #endif | |||
| iy = 0; | |||
| a_ptr = a; | |||
| for (j = 0; j < n; j++) | |||
| { | |||
| temp = 0.0; | |||
| ix = 0; | |||
| for (i = 0; i < m; i++) | |||
| { | |||
| temp += BF16TOF32(a_ptr[i]) * BF16TOF32(x[ix]); | |||
| ix += inc_x; | |||
| } | |||
| y[iy] += F32TOBF16(ALPHA * temp); | |||
| iy += inc_y; | |||
| a_ptr += lda; | |||
| } | |||
| return (0); | |||
| } | |||
| @@ -0,0 +1,106 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, 2025 The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) | |||
| { | |||
| BLASLONG i = 0, j = 0; | |||
| #if defined(BFLOAT16) | |||
| float x_float, da_float; | |||
| SBF16TOS_K(1, &da, 1, &da_float, 1); | |||
| #else | |||
| float x_float; | |||
| float da_float = da; | |||
| #endif | |||
| if ((n <= 0) || (inc_x <= 0)) | |||
| return (0); | |||
| if (dummy2 == 0) | |||
| { | |||
| while (j < n) | |||
| { | |||
| if (da_float == 0.0) | |||
| x_float = 0.0; | |||
| else | |||
| { | |||
| #if defined(BFLOAT16) | |||
| SBF16TOS_K(1, &x[i], 1, &x_float, 1); | |||
| #else | |||
| float x_float = x[i]; | |||
| #endif | |||
| x_float = da_float * x_float; | |||
| } | |||
| #if defined(BFLOAT16) | |||
| SBSTOBF16_K(1, &x_float, 1, &x[i], 1); | |||
| #else | |||
| x[i] = x_float; | |||
| #endif | |||
| i += inc_x; | |||
| j++; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| while (j < n) | |||
| { | |||
| #if defined(BFLOAT16) | |||
| SBF16TOS_K(1, &x[i], 1, &x_float, 1); | |||
| #else | |||
| float x_float = x[i]; | |||
| #endif | |||
| if (da == 0.0) | |||
| if (!isnan(x_float) && !isinf(x_float)) | |||
| { | |||
| x_float = 0.0; | |||
| } | |||
| else | |||
| { | |||
| x_float = NAN; | |||
| } | |||
| else | |||
| { | |||
| x_float = da_float * x_float; | |||
| } | |||
| #if defined(BFLOAT16) | |||
| SBSTOBF16_K(1, &x_float, 1, &x[i], 1); | |||
| #else | |||
| x[i] = x_float; | |||
| #endif | |||
| i += inc_x; | |||
| j++; | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -83,8 +83,8 @@ gotoblas_t TABLE_NAME = { | |||
| isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, | |||
| snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS, | |||
| dsdot_kTS, | |||
| srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, | |||
| sbgemv_nTS, sbgemv_tTS, sger_kTS, | |||
| srot_kTS, srotm_kTS, bscal_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, | |||
| bgemv_nTS, bgemv_tTS, sbgemv_nTS, sbgemv_tTS, sger_kTS, | |||
| ssymv_LTS, ssymv_UTS, | |||
| bgemm_kernelTS, bgemm_betaTS, | |||
| @@ -119,6 +119,10 @@ endif | |||
| endif | |||
| endif | |||
| ifeq ($(BUILD_BFLOAT16), 1) | |||
| BB2 = test_bgemv | |||
| B2 = test_sbgemv | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| S2=sblat2 | |||
| endif | |||
| @@ -132,11 +136,17 @@ ifeq ($(BUILD_COMPLEX16),1) | |||
| Z2=zblat2 | |||
| endif | |||
| level2: $(S2) $(D2) $(C2) $(Z2) | |||
| level2: $(BB2) $(B2) $(S2) $(D2) $(C2) $(Z2) | |||
| ifneq ($(CROSS), 1) | |||
| rm -f ?BLAT2.SUMM | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_bgemv > BBLAT2.SUMM | |||
| @$(GREP) -q FATAL BBLAT2.SUMM && cat BBLAT2.SUMM || exit 0 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_sbgemv > SBBLAT2.SUMM | |||
| @$(GREP) -q FATAL SBBLAT2.SUMM && cat SBBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat | |||
| @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 | |||
| @@ -156,6 +166,12 @@ endif | |||
| ifdef SMP | |||
| rm -f ?BLAT2.SUMM | |||
| ifeq ($(USE_OPENMP), 1) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| OMP_NUM_THREADS=2 ./test_bgemv > BBLAT2.SUMM | |||
| @$(GREP) -q FATAL BBLAT2.SUMM && cat BBLAT2.SUMM || exit 0 | |||
| OMP_NUM_THREADS=2 ./test_sbgemv > SBBLAT2.SUMM | |||
| @$(GREP) -q FATAL SBBLAT2.SUMM && cat SBBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat | |||
| @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 | |||
| @@ -173,6 +189,12 @@ ifeq ($(BUILD_COMPLEX16),1) | |||
| @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 | |||
| endif | |||
| else | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| OMP_NUM_THREADS=2 ./test_bgemv > BBLAT2.SUMM | |||
| @$(GREP) -q FATAL BBLAT2.SUMM && cat BBLAT2.SUMM || exit 0 | |||
| OMP_NUM_THREADS=2 ./test_sbgemv > SBBLAT2.SUMM | |||
| @$(GREP) -q FATAL SBBLAT2.SUMM && cat SBBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat | |||
| @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 | |||
| @@ -195,7 +217,7 @@ endif | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| BF3= test_bgemm | |||
| B3= test_sbgemm | |||
| B3 = test_sbgemm | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| S3=sblat3 | |||
| @@ -404,10 +426,16 @@ endif | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| test_bgemm : compare_sgemm_bgemm.c test_helpers.h ../$(LIBNAME) | |||
| $(CC) $(CLDFLAGS) -o test_bgemm compare_sgemm_bgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| $(CC) $(CLDFLAGS) -DIBFLOAT16 -DOBFLOAT16 -o test_bgemm compare_sgemm_bgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| test_bgemv : compare_sgemv_bgemv.c ../$(LIBNAME) | |||
| $(CC) $(CLDFLAGS) -DIBFLOAT16 -DOBFLOAT16 -o test_bgemv compare_sgemv_bgemv.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| test_sbgemm : compare_sgemm_sbgemm.c test_helpers.h ../$(LIBNAME) | |||
| $(CC) $(CLDFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| $(CC) $(CLDFLAGS) -DIBFLOAT16 -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| test_sbgemv : compare_sgemv_sbgemv.c ../$(LIBNAME) | |||
| $(CC) $(CLDFLAGS) -DIBFLOAT16 -o test_sbgemv compare_sgemv_sbgemv.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| @@ -426,7 +454,7 @@ clean: | |||
| @rm -f *.$(SUFFIX) *.$(PSUFFIX) gmon.$(SUFFIX)ut *.SUMM *.cxml *.exe *.pdb *.dwf \ | |||
| sblat1 dblat1 cblat1 zblat1 \ | |||
| sblat2 dblat2 cblat2 zblat2 \ | |||
| test_bgemm test_sbgemm sblat3 dblat3 cblat3 zblat3 \ | |||
| test_bgemm test_bgemv test_sbgemm test_sbgemv sblat3 dblat3 cblat3 zblat3 \ | |||
| sblat1p dblat1p cblat1p zblat1p \ | |||
| sblat2p dblat2p cblat2p zblat2p \ | |||
| sblat3p dblat3p cblat3p zblat3p \ | |||
| @@ -34,15 +34,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define BGEMM BLASFUNC(bgemm) | |||
| #define BGEMM_LARGEST 256 | |||
| static float truncate_float32_to_bfloat16(float value) { | |||
| blasint one = 1; | |||
| bfloat16 tmp; | |||
| float result; | |||
| sbstobf16_(&one, &value, &one, &tmp, &one); | |||
| sbf16tos_(&one, &tmp, &one, &result, &one); | |||
| return result; | |||
| } | |||
| int | |||
| main (int argc, char *argv[]) | |||
| { | |||
| @@ -158,6 +149,7 @@ main (int argc, char *argv[]) | |||
| if (ret != 0) { | |||
| fprintf (stderr, "FATAL ERROR BGEMM - Return code: %d\n", ret); | |||
| return ret; | |||
| } | |||
| return ret; | |||
| } | |||
| @@ -141,87 +141,7 @@ main (int argc, char *argv[]) | |||
| if (ret != 0) { | |||
| fprintf (stderr, "FATAL ERROR SBGEMM - Return code: %d\n", ret); | |||
| return ret; | |||
| } | |||
| for (beta = 0; beta < 3; beta += 1) { | |||
| for (alpha = 0; alpha < 3; alpha += 1) { | |||
| for (l = 0; l < 2; l++) { // l = 1 to test inc_x & inc_y not equal to one. | |||
| for (x = 1; x <= loop; x++) | |||
| { | |||
| k = (x == 0) ? 0 : l + 1; | |||
| float *A = (float *)malloc_safe(x * x * sizeof(FLOAT)); | |||
| float *B = (float *)malloc_safe(x * sizeof(FLOAT) << l); | |||
| float *C = (float *)malloc_safe(x * sizeof(FLOAT) << l); | |||
| bfloat16 *AA = (bfloat16 *)malloc_safe(x * x * sizeof(bfloat16)); | |||
| bfloat16 *BB = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l); | |||
| float *DD = (float *)malloc_safe(x * sizeof(FLOAT)); | |||
| float *CC = (float *)malloc_safe(x * sizeof(FLOAT) << l); | |||
| if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) || | |||
| (DD == NULL) || (CC == NULL)) | |||
| return 1; | |||
| blasint one = 1; | |||
| for (j = 0; j < x; j++) | |||
| { | |||
| for (i = 0; i < x; i++) | |||
| { | |||
| A[j * x + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5; | |||
| sbstobf16_(&one, &A[j*x+i], &one, &AA[j * x + i], &one); | |||
| } | |||
| B[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5; | |||
| sbstobf16_(&one, &B[j << l], &one, &BB[j << l], &one); | |||
| CC[j << l] = C[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5; | |||
| } | |||
| for (y = 0; y < 2; y++) | |||
| { | |||
| if (y == 0) { | |||
| transA = 'N'; | |||
| } else { | |||
| transA = 'T'; | |||
| } | |||
| memset(CC, 0, x * sizeof(FLOAT) << l); | |||
| memset(DD, 0, x * sizeof(FLOAT)); | |||
| memset(C, 0, x * sizeof(FLOAT) << l); | |||
| SGEMV (&transA, &x, &x, &alpha, A, &x, B, &k, &beta, C, &k); | |||
| SBGEMV (&transA, &x, &x, &alpha, (bfloat16*) AA, &x, (bfloat16*) BB, &k, &beta, CC, &k); | |||
| for (int i = 0; i < x; i ++) DD[i] *= beta; | |||
| for (j = 0; j < x; j++) | |||
| for (i = 0; i < x; i++) | |||
| if (transA == 'N') { | |||
| DD[i] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[j << l]); | |||
| } else if (transA == 'T') { | |||
| DD[j] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[i << l]); | |||
| } | |||
| for (j = 0; j < x; j++) { | |||
| if (!is_close(CC[j << l], C[j << l], 0.01, 0.001)) { | |||
| ret++; | |||
| } | |||
| if (!is_close(CC[j << l], DD[j], 0.001, 0.0001)) { | |||
| ret++; | |||
| } | |||
| } | |||
| } | |||
| free(A); | |||
| free(B); | |||
| free(C); | |||
| free(AA); | |||
| free(BB); | |||
| free(DD); | |||
| free(CC); | |||
| } // x | |||
| } // l | |||
| } // alpha | |||
| } // beta | |||
| if (ret != 0) | |||
| fprintf (stderr, "FATAL ERROR SBGEMV - Return code: %d\n", ret); | |||
| return ret; | |||
| } | |||
| @@ -0,0 +1,149 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020,2025 The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdint.h> | |||
| #include "../common.h" | |||
| #include "test_helpers.h" | |||
| #define SGEMV BLASFUNC(sgemv) | |||
| #define BGEMV BLASFUNC(bgemv) | |||
| #define BGEMV_LARGEST 256 | |||
| int main(int argc, char *argv[]) | |||
| { | |||
| blasint k; | |||
| int i, j, l; | |||
| blasint x, y; | |||
| blasint one = 1; | |||
| int ret = 0; | |||
| int loop = BGEMV_LARGEST; | |||
| char transA = 'N'; | |||
| float alpha = 1.0, beta = 0.0; | |||
| bfloat16 alpha_bf16, beta_bf16; | |||
| for (beta = 0; beta < 3; beta += 1) | |||
| { | |||
| for (alpha = 0; alpha < 3; alpha += 1) | |||
| { | |||
| for (l = 0; l < 2; l++) | |||
| { // l = 1 to test inc_x & inc_y not equal to one. | |||
| for (x = 1; x <= loop; x++) | |||
| { | |||
| k = (x == 0) ? 0 : l + 1; | |||
| float *A = (float *)malloc_safe(x * x * sizeof(FLOAT)); | |||
| float *B = (float *)malloc_safe(x * sizeof(FLOAT) << l); | |||
| float *C = (float *)malloc_safe(x * sizeof(FLOAT) << l); | |||
| bfloat16 *AA = (bfloat16 *)malloc_safe(x * x * sizeof(bfloat16)); | |||
| bfloat16 *BB = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l); | |||
| bfloat16 *CC = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l); | |||
| float *DD = (float *)malloc_safe(x * sizeof(FLOAT)); | |||
| if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) || | |||
| (CC == NULL) || (DD == NULL)) | |||
| return 1; | |||
| for (j = 0; j < x; j++) | |||
| { | |||
| for (i = 0; i < x; i++) | |||
| { | |||
| A[j * x + i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) + 0.5; | |||
| sbstobf16_(&one, &A[j * x + i], &one, &AA[j * x + i], &one); | |||
| } | |||
| B[j << l] = ((FLOAT)rand() / (FLOAT)RAND_MAX) + 0.5; | |||
| sbstobf16_(&one, &B[j << l], &one, &BB[j << l], &one); | |||
| C[j << l] = ((FLOAT)rand() / (FLOAT)RAND_MAX) + 0.5; | |||
| sbstobf16_(&one, &B[j << l], &one, &CC[j << l], &one); | |||
| } | |||
| for (y = 0; y < 2; y++) | |||
| { | |||
| if (y == 0) | |||
| { | |||
| transA = 'N'; | |||
| } | |||
| else | |||
| { | |||
| transA = 'T'; | |||
| } | |||
| memset(C, 0, x * sizeof(FLOAT) << l); | |||
| memset(CC, 0, x * sizeof(bfloat16) << l); | |||
| memset(DD, 0, x * sizeof(FLOAT)); | |||
| sbstobf16_(&one, &alpha, &one, &alpha_bf16, &one); | |||
| sbstobf16_(&one, &beta, &one, &beta_bf16, &one); | |||
| SGEMV(&transA, &x, &x, &alpha, A, &x, B, &k, &beta, C, &k); | |||
| BGEMV(&transA, &x, &x, &alpha_bf16, AA, &x, BB, &k, &beta_bf16, CC, &k); | |||
| for (int i = 0; i < x; i++) | |||
| DD[i] *= beta; | |||
| for (j = 0; j < x; j++) | |||
| for (i = 0; i < x; i++) | |||
| if (transA == 'N') | |||
| { | |||
| DD[i] += alpha * float16to32(AA[j * x + i]) * float16to32(BB[j << l]); | |||
| } | |||
| else if (transA == 'T') | |||
| { | |||
| DD[j] += alpha * float16to32(AA[j * x + i]) * float16to32(BB[i << l]); | |||
| } | |||
| for (j = 0; j < x; j++) | |||
| { | |||
| if (!is_close(float16to32(CC[j << l]), truncate_float32_to_bfloat16(C[j << l]), 0.01, 0.001)) | |||
| { | |||
| printf("Mismatch at trans=%c, alpha=%.2f, beta=%.2f, i=%d, j=%d, k=%d: CC=%.6f, C=%.6f\n", | |||
| transA, alpha, beta, i, j, k, float16to32(CC[j << l]), truncate_float32_to_bfloat16(C[j << l])); | |||
| ret++; | |||
| } | |||
| if (!is_close(float16to32(CC[j << l]), truncate_float32_to_bfloat16(DD[j]), 0.001, 0.0001)) | |||
| { | |||
| printf("Mismatch at trans=%c, alpha=%.2f, beta=%.2f, i=%d, j=%d, k=%d: CC=%.6f, C=%.6f\n", | |||
| transA, alpha, beta, i, j, k, float16to32(CC[j << l]), truncate_float32_to_bfloat16(DD[j])); | |||
| ret++; | |||
| } | |||
| } | |||
| } | |||
| free(A); | |||
| free(B); | |||
| free(C); | |||
| free(AA); | |||
| free(BB); | |||
| free(CC); | |||
| free(DD); | |||
| } // x | |||
| } // l | |||
| } // alpha | |||
| } // beta | |||
| if (ret != 0) | |||
| fprintf(stderr, "FATAL ERROR BGEMV - Return code: %d\n", ret); | |||
| return ret; | |||
| } | |||
| @@ -0,0 +1,128 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020,2025 The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <stdio.h> | |||
| #include <stdint.h> | |||
| #include "../common.h" | |||
| #include "test_helpers.h" | |||
| #define SGEMV BLASFUNC(sgemv) | |||
| #define SBGEMV BLASFUNC(sbgemv) | |||
| #define SBGEMV_LARGEST 256 | |||
| int | |||
| main (int argc, char *argv[]) | |||
| { | |||
| blasint k; | |||
| int i, j, l; | |||
| blasint x, y; | |||
| int ret = 0; | |||
| int loop = SBGEMV_LARGEST; | |||
| char transA = 'N'; | |||
| float alpha = 1.0, beta = 0.0; | |||
| for (beta = 0; beta < 3; beta += 1) { | |||
| for (alpha = 0; alpha < 3; alpha += 1) { | |||
| for (l = 0; l < 2; l++) { // l = 1 to test inc_x & inc_y not equal to one. | |||
| for (x = 1; x <= loop; x++) | |||
| { | |||
| k = (x == 0) ? 0 : l + 1; | |||
| float *A = (float *)malloc_safe(x * x * sizeof(FLOAT)); | |||
| float *B = (float *)malloc_safe(x * sizeof(FLOAT) << l); | |||
| float *C = (float *)malloc_safe(x * sizeof(FLOAT) << l); | |||
| bfloat16 *AA = (bfloat16 *)malloc_safe(x * x * sizeof(bfloat16)); | |||
| bfloat16 *BB = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l); | |||
| float *CC = (float *)malloc_safe(x * sizeof(FLOAT) << l); | |||
| float *DD = (float *)malloc_safe(x * sizeof(FLOAT)); | |||
| if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) || | |||
| (DD == NULL) || (CC == NULL)) | |||
| return 1; | |||
| blasint one = 1; | |||
| for (j = 0; j < x; j++) | |||
| { | |||
| for (i = 0; i < x; i++) | |||
| { | |||
| A[j * x + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5; | |||
| sbstobf16_(&one, &A[j*x+i], &one, &AA[j * x + i], &one); | |||
| } | |||
| B[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5; | |||
| sbstobf16_(&one, &B[j << l], &one, &BB[j << l], &one); | |||
| CC[j << l] = C[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5; | |||
| } | |||
| for (y = 0; y < 2; y++) | |||
| { | |||
| if (y == 0) { | |||
| transA = 'N'; | |||
| } else { | |||
| transA = 'T'; | |||
| } | |||
| memset(CC, 0, x * sizeof(FLOAT) << l); | |||
| memset(DD, 0, x * sizeof(FLOAT)); | |||
| memset(C, 0, x * sizeof(FLOAT) << l); | |||
| SGEMV (&transA, &x, &x, &alpha, A, &x, B, &k, &beta, C, &k); | |||
| SBGEMV (&transA, &x, &x, &alpha, (bfloat16*) AA, &x, (bfloat16*) BB, &k, &beta, CC, &k); | |||
| for (int i = 0; i < x; i ++) DD[i] *= beta; | |||
| for (j = 0; j < x; j++) | |||
| for (i = 0; i < x; i++) | |||
| if (transA == 'N') { | |||
| DD[i] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[j << l]); | |||
| } else if (transA == 'T') { | |||
| DD[j] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[i << l]); | |||
| } | |||
| for (j = 0; j < x; j++) { | |||
| if (!is_close(CC[j << l], C[j << l], 0.01, 0.001)) { | |||
| ret++; | |||
| } | |||
| if (!is_close(CC[j << l], DD[j], 0.001, 0.0001)) { | |||
| ret++; | |||
| } | |||
| } | |||
| } | |||
| free(A); | |||
| free(B); | |||
| free(C); | |||
| free(AA); | |||
| free(BB); | |||
| free(DD); | |||
| free(CC); | |||
| } // x | |||
| } // l | |||
| } // alpha | |||
| } // beta | |||
| if (ret != 0) | |||
| fprintf (stderr, "FATAL ERROR SBGEMV - Return code: %d\n", ret); | |||
| return ret; | |||
| } | |||
| @@ -31,7 +31,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include "../common.h" | |||
| #if IFLOAT == bfloat16 | |||
| #ifdef IBFLOAT16 | |||
| static float float16to32(bfloat16 value) | |||
| { | |||
| blasint one = 1; | |||
| @@ -41,6 +41,17 @@ static float float16to32(bfloat16 value) | |||
| } | |||
| #endif | |||
| #ifdef OBFLOAT16 | |||
| static float truncate_float32_to_bfloat16(float value) { | |||
| blasint one = 1; | |||
| bfloat16 tmp; | |||
| float result; | |||
| sbstobf16_(&one, &value, &one, &tmp, &one); | |||
| sbf16tos_(&one, &tmp, &one, &result, &one); | |||
| return result; | |||
| } | |||
| #endif | |||
| static void *malloc_safe(size_t size) { | |||
| if (size == 0) | |||
| return malloc(1); | |||