diff --git a/.github/workflows/apple_m.yml b/.github/workflows/apple_m.yml index 81952dabd..56d46cf0c 100644 --- a/.github/workflows/apple_m.yml +++ b/.github/workflows/apple_m.yml @@ -87,10 +87,16 @@ jobs: echo "max_size = 300M" > ~/.ccache/ccache.conf echo "compression = true" >> ~/.ccache/ccache.conf ccache -s + + - name: Add gfortran runtime to link path + if: matrix.build == 'make' && runner.os == 'macOS' + run: | + GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname) + echo "Using gfortran runtime in $GFORTRAN_LIBDIR" + echo "LDFLAGS=-L/opt/homebrew/opt/llvm/lib -L$GFORTRAN_LIBDIR" >> $GITHUB_ENV - name: Build OpenBLAS run: | - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib" export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" export CC="/opt/homebrew/opt/llvm/bin/clang" case "${{ matrix.build }}" in diff --git a/.github/workflows/dynamic_arch.yml b/.github/workflows/dynamic_arch.yml index 2d5c7b612..950ab08ea 100644 --- a/.github/workflows/dynamic_arch.yml +++ b/.github/workflows/dynamic_arch.yml @@ -89,6 +89,14 @@ jobs: echo "max_size = 300M" > ~/.ccache/ccache.conf echo "compression = true" >> ~/.ccache/ccache.conf ccache -s + + - name: Add gfortran runtime to link path + if: matrix.build == 'make' && runner.os == 'macOS' + run: | + GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname) + echo "Using gfortran runtime in $GFORTRAN_LIBDIR" + # Preserve whatever LDFLAGS may already contain + echo "LDFLAGS=${LDFLAGS:+$LDFLAGS }-L$GFORTRAN_LIBDIR" >> "$GITHUB_ENV" - name: Build OpenBLAS run: | diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 57993889a..b678c017a 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -255,6 +255,7 @@ In chronological order: * Abhishek Kumar * [2025-04-22] Optimise dot kernel for NEOVERSE V1 + * [2025-07-23] ARM64-Enable bfloat16 kernels by default * Sharif Inamdar * [2025-06-05] Optimize gemv_n_sve_v1x3 kernel diff --git a/Makefile.system b/Makefile.system index b214006b1..3f1c48d23 100644 --- a/Makefile.system +++ b/Makefile.system @@ -270,6 +270,7 @@ SMALL_MATRIX_OPT = 1 BUILD_BFLOAT16 = 1 else ifeq ($(ARCH), arm64) SMALL_MATRIX_OPT = 1 +BUILD_BFLOAT16 = 1 endif ifeq ($(ARCH), loongarch64) SMALL_MATRIX_OPT = 1 @@ -425,10 +426,8 @@ ifeq ($(OSNAME), Darwin) ifndef MACOSX_DEPLOYMENT_TARGET ifeq ($(ARCH), arm64) export MACOSX_DEPLOYMENT_TARGET=11.0 -ifeq ($(C_COMPILER), GCC) export NO_SVE = 1 export NO_SME = 1 -endif else export MACOSX_DEPLOYMENT_TARGET=10.8 endif diff --git a/kernel/arm64/bgemm_kernel_4x4_neoversev1.c b/kernel/arm64/bgemm_kernel_4x4_neoversev1.c index 7af31bb2c..7067413ff 100644 --- a/kernel/arm64/bgemm_kernel_4x4_neoversev1.c +++ b/kernel/arm64/bgemm_kernel_4x4_neoversev1.c @@ -27,6 +27,7 @@ * *****************************************************************************/ #include +#include #include "common.h" diff --git a/kernel/arm64/bgemm_kernel_4x4_neoversev1_impl.c b/kernel/arm64/bgemm_kernel_4x4_neoversev1_impl.c index 2477da9c0..1f49d6200 100644 --- a/kernel/arm64/bgemm_kernel_4x4_neoversev1_impl.c +++ b/kernel/arm64/bgemm_kernel_4x4_neoversev1_impl.c @@ -27,6 +27,7 @@ * *****************************************************************************/ #include +#include #include "common.h" diff --git a/kernel/arm64/bgemv_n_sve_v3x4.c b/kernel/arm64/bgemv_n_sve_v3x4.c index 6347746d0..b5e5f76a2 100644 --- a/kernel/arm64/bgemv_n_sve_v3x4.c +++ b/kernel/arm64/bgemv_n_sve_v3x4.c @@ -28,6 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #include +#include #define UPDATE_PTRSx2 \ a_ptr1 = a_ptr0 + lda;