ARM64: Enable bfloat16 kernels by defaultpull/5406/head
| @@ -87,10 +87,16 @@ jobs: | |||||
| echo "max_size = 300M" > ~/.ccache/ccache.conf | echo "max_size = 300M" > ~/.ccache/ccache.conf | ||||
| echo "compression = true" >> ~/.ccache/ccache.conf | echo "compression = true" >> ~/.ccache/ccache.conf | ||||
| ccache -s | ccache -s | ||||
| - name: Add gfortran runtime to link path | |||||
| if: matrix.build == 'make' && runner.os == 'macOS' | |||||
| run: | | |||||
| GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname) | |||||
| echo "Using gfortran runtime in $GFORTRAN_LIBDIR" | |||||
| echo "LDFLAGS=-L/opt/homebrew/opt/llvm/lib -L$GFORTRAN_LIBDIR" >> $GITHUB_ENV | |||||
| - name: Build OpenBLAS | - name: Build OpenBLAS | ||||
| run: | | run: | | ||||
| export LDFLAGS="-L/opt/homebrew/opt/llvm/lib" | |||||
| export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" | export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" | ||||
| export CC="/opt/homebrew/opt/llvm/bin/clang" | export CC="/opt/homebrew/opt/llvm/bin/clang" | ||||
| case "${{ matrix.build }}" in | case "${{ matrix.build }}" in | ||||
| @@ -89,6 +89,14 @@ jobs: | |||||
| echo "max_size = 300M" > ~/.ccache/ccache.conf | echo "max_size = 300M" > ~/.ccache/ccache.conf | ||||
| echo "compression = true" >> ~/.ccache/ccache.conf | echo "compression = true" >> ~/.ccache/ccache.conf | ||||
| ccache -s | ccache -s | ||||
| - name: Add gfortran runtime to link path | |||||
| if: matrix.build == 'make' && runner.os == 'macOS' | |||||
| run: | | |||||
| GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname) | |||||
| echo "Using gfortran runtime in $GFORTRAN_LIBDIR" | |||||
| # Preserve whatever LDFLAGS may already contain | |||||
| echo "LDFLAGS=${LDFLAGS:+$LDFLAGS }-L$GFORTRAN_LIBDIR" >> "$GITHUB_ENV" | |||||
| - name: Build OpenBLAS | - name: Build OpenBLAS | ||||
| run: | | run: | | ||||
| @@ -255,6 +255,7 @@ In chronological order: | |||||
| * Abhishek Kumar <https://github.com/abhishek-iitmadras> | * Abhishek Kumar <https://github.com/abhishek-iitmadras> | ||||
| * [2025-04-22] Optimise dot kernel for NEOVERSE V1 | * [2025-04-22] Optimise dot kernel for NEOVERSE V1 | ||||
| * [2025-07-23] ARM64-Enable bfloat16 kernels by default | |||||
| * Sharif Inamdar <sharif.inamdar@arm.com> | * Sharif Inamdar <sharif.inamdar@arm.com> | ||||
| * [2025-06-05] Optimize gemv_n_sve_v1x3 kernel | * [2025-06-05] Optimize gemv_n_sve_v1x3 kernel | ||||
| @@ -270,6 +270,7 @@ SMALL_MATRIX_OPT = 1 | |||||
| BUILD_BFLOAT16 = 1 | BUILD_BFLOAT16 = 1 | ||||
| else ifeq ($(ARCH), arm64) | else ifeq ($(ARCH), arm64) | ||||
| SMALL_MATRIX_OPT = 1 | SMALL_MATRIX_OPT = 1 | ||||
| BUILD_BFLOAT16 = 1 | |||||
| endif | endif | ||||
| ifeq ($(ARCH), loongarch64) | ifeq ($(ARCH), loongarch64) | ||||
| SMALL_MATRIX_OPT = 1 | SMALL_MATRIX_OPT = 1 | ||||
| @@ -425,10 +426,8 @@ ifeq ($(OSNAME), Darwin) | |||||
| ifndef MACOSX_DEPLOYMENT_TARGET | ifndef MACOSX_DEPLOYMENT_TARGET | ||||
| ifeq ($(ARCH), arm64) | ifeq ($(ARCH), arm64) | ||||
| export MACOSX_DEPLOYMENT_TARGET=11.0 | export MACOSX_DEPLOYMENT_TARGET=11.0 | ||||
| ifeq ($(C_COMPILER), GCC) | |||||
| export NO_SVE = 1 | export NO_SVE = 1 | ||||
| export NO_SME = 1 | export NO_SME = 1 | ||||
| endif | |||||
| else | else | ||||
| export MACOSX_DEPLOYMENT_TARGET=10.8 | export MACOSX_DEPLOYMENT_TARGET=10.8 | ||||
| endif | endif | ||||
| @@ -27,6 +27,7 @@ | |||||
| * *****************************************************************************/ | * *****************************************************************************/ | ||||
| #include <arm_sve.h> | #include <arm_sve.h> | ||||
| #include <arm_neon.h> | |||||
| #include "common.h" | #include "common.h" | ||||
| @@ -27,6 +27,7 @@ | |||||
| * *****************************************************************************/ | * *****************************************************************************/ | ||||
| #include <arm_sve.h> | #include <arm_sve.h> | ||||
| #include <arm_neon.h> | |||||
| #include "common.h" | #include "common.h" | ||||
| @@ -28,6 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #include <arm_sve.h> | #include <arm_sve.h> | ||||
| #include <arm_neon.h> | |||||
| #define UPDATE_PTRSx2 \ | #define UPDATE_PTRSx2 \ | ||||
| a_ptr1 = a_ptr0 + lda; | a_ptr1 = a_ptr0 + lda; | ||||