name: Update Binaries on: workflow_dispatch: inputs: llama_cpp_commit: description: 'Branch, tag, or commit to use for llama.cpp' required: true default: 'master' push: branches: [cron_job] #schedule: # - cron: "22 22 * * 2" concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event.inputs.llama_cpp_commit }} cancel-in-progress: true env: # Compiler defines common to all platforms COMMON_DEFINE: -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON jobs: compile-linux: name: Compile (Linux) strategy: fail-fast: true matrix: include: - build: 'noavx' defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF' - build: 'avx2' defines: '' - build: 'avx' defines: '-DLLAMA_AVX2=OFF' - build: 'avx512' defines: '-DLLAMA_AVX512=ON' runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v4 with: repository: ggerganov/llama.cpp fetch-depth: 0 ref: '${{ github.event.inputs.llama_cpp_commit }}' - name: Build id: cmake_build run: | mkdir build cd build cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }} cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} - uses: actions/upload-artifact@v3 with: path: ./build/libllama.so name: llama-bin-linux-${{ matrix.build }}-x64.so - name: Upload Llava uses: actions/upload-artifact@v3 with: path: ./build/examples/llava/libllava_shared.so name: llava-bin-linux-${{ matrix.build }}-x64.so compile-windows: name: Compile (Windows) strategy: fail-fast: true matrix: include: - build: 'noavx' defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF' - build: 'avx2' defines: '' - build: 'avx' defines: '-DLLAMA_AVX2=OFF' - build: 'avx512' defines: '-DLLAMA_AVX512=ON -DLLAMA_AVX512_VBMI=ON -DLLAMA_AVX512_VNNI=ON' runs-on: windows-latest steps: - uses: actions/checkout@v4 with: repository: ggerganov/llama.cpp fetch-depth: 0 ref: '${{ github.event.inputs.llama_cpp_commit }}' - name: Build id: cmake_build run: | mkdir build cd build cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }} cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} - name: Upload artifacts uses: actions/upload-artifact@v3 with: path: .\build\bin\Release\llama.dll name: llama-bin-win-${{ matrix.build }}-x64.dll - name: Upload Llava uses: actions/upload-artifact@v3 with: path: .\build\bin\Release\llava_shared.dll name: llava-bin-win-${{ matrix.build }}-x64.dll compile-clblast: name: Compile (clblast) - ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ ubuntu-22.04, windows-latest ] env: OPENBLAS_VERSION: 0.3.23 OPENCL_VERSION: 2023.04.17 CLBLAST_VERSION: 1.6.0 VULKAN_VERSION: 1.3.261.1 runs-on: ${{ matrix.os }} steps: - name: Clone id: checkout uses: actions/checkout@v4 with: repository: ggerganov/llama.cpp - name: Download dependencies - Linux if: ${{ matrix.os == 'ubuntu-22.04' }} run: | sudo apt update sudo apt install libopencl-clang-dev libclblast-dev - name: Download dependencies - Windows id: get_opencl if: ${{ matrix.os == 'windows-latest' }} run: | curl.exe -o $env:RUNNER_TEMP/opencl.zip -L "https://github.com/KhronosGroup/OpenCL-SDK/releases/download/v${env:OPENCL_VERSION}/OpenCL-SDK-v${env:OPENCL_VERSION}-Win-x64.zip" mkdir $env:RUNNER_TEMP/opencl tar.exe -xvf $env:RUNNER_TEMP/opencl.zip --strip-components=1 -C $env:RUNNER_TEMP/opencl curl.exe -o $env:RUNNER_TEMP/clblast.7z -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-windows-x64.7z" curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE" 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/clblast.7z rename-item $env:RUNNER_TEMP/CLBlast-${env:CLBLAST_VERSION}-windows-x64 clblast foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) { $txt = Get-Content -Path $f -Raw $txt.Replace('C:/vcpkg/packages/opencl_x64-windows/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8 } - name: Build id: cmake_build if: ${{ matrix.os == 'windows-latest' }} run: | mkdir build cd build cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast" cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} copy $env:RUNNER_TEMP/clblast/lib/clblast.dll .\bin\Release\clblast.dll # # We should probably generate a sha256 sum in a file, and use that. # echo "78a8c98bcb2efe1a63318d901ab204d9ba96c3b29707b4ce0c4240bdcdc698d6 ./bin/Release/clblast.dll" >> tmp # sha256sum -c tmp || exit 255 # rm tmp ls -R - name: Build if: ${{ matrix.os == 'ubuntu-22.04' }} run: | mkdir build cd build cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_CLBLAST=ON cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} # if we ever want to pull libclblast.so back into the packages, just uncomment this line, and the one below for the upload # cp $(ldconfig -p | grep libclblast.so | tail -n 1 | cut -d ' ' -f 4) ./ ls -R - name: Upload artifacts (Windows) if: ${{ matrix.os == 'windows-latest' }} uses: actions/upload-artifact@v3 with: path: | .\build\bin\Release\llama.dll .\build\bin\Release\clblast.dll name: llama-bin-win-clblast-x64.dll - name: Upload llava artifacts (Windows) if: ${{ matrix.os == 'windows-latest' }} uses: actions/upload-artifact@v3 with: path: | .\build\bin\Release\llava_shared.dll name: llava-bin-win-clblast-x64.dll - name: Upload artifacts (linux) if: ${{ matrix.os == 'ubuntu-22.04' }} uses: actions/upload-artifact@v3 with: path: | ./build/libllama.so # ./build/libclblast.so name: llama-bin-linux-clblast-x64.so - name: Upload llava artifacts (linux) if: ${{ matrix.os == 'ubuntu-22.04' }} uses: actions/upload-artifact@v3 with: path: | ./build/examples/llava/libllava_shared.so name: llava-bin-linux-clblast-x64.so compile-cublas: name: Compile (cublas) strategy: fail-fast: false matrix: os: [ubuntu-20.04, windows-latest] cuda: ['12.1.0', '11.7.1'] runs-on: ${{ matrix.os }} steps: - name: Clone id: checkout uses: actions/checkout@v4 with: repository: ggerganov/llama.cpp fetch-depth: 0 ref: '${{ github.event.inputs.llama_cpp_commit }}' - uses: Jimver/cuda-toolkit@v0.2.14 if: runner.os == 'Windows' id: cuda-toolkit-windows with: cuda: ${{ matrix.cuda }} method: 'network' sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' - uses: Jimver/cuda-toolkit@v0.2.14 if: runner.os == 'Linux' id: cuda-toolkit-linux with: cuda: ${{ matrix.cuda }} method: 'network' linux-local-args: '["--toolkit"]' - name: Build id: cmake_build run: | mkdir build cd build cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_CUBLAS=ON cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} ls -R - name: Upload artifacts (Windows) if: ${{ matrix.os == 'windows-latest' }} uses: actions/upload-artifact@v3 with: path: .\build\bin\Release\llama.dll name: llama-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll - name: Upload llava artifacts (Windows) if: ${{ matrix.os == 'windows-latest' }} uses: actions/upload-artifact@v3 with: path: .\build\bin\Release\llava_shared.dll name: llava-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll - name: Upload artifacts (Linux) if: ${{ matrix.os == 'ubuntu-20.04' }} uses: actions/upload-artifact@v3 with: path: ./build/libllama.so name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so - name: Upload llava artifacts (Linux) if: ${{ matrix.os == 'ubuntu-20.04' }} uses: actions/upload-artifact@v3 with: path: ./build/examples/llava/libllava_shared.so name: llava-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so compile-macos: name: Compile (MacOS) strategy: fail-fast: true matrix: include: - build: 'arm64' defines: '-DCMAKE_OSX_ARCHITECTURES=arm64' - build: 'x64' defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=OFF -DLLAMA_AVX=ON -DLLAMA_AVX2=ON' runs-on: macos-latest steps: - uses: actions/checkout@v4 with: repository: ggerganov/llama.cpp fetch-depth: 0 ref: '${{ github.event.inputs.llama_cpp_commit }}' - name: Dependencies continue-on-error: true run: | brew update - name: Build id: cmake_build run: | mkdir build cd build cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }} cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} - name: Upload artifacts uses: actions/upload-artifact@v3 with: path: ./build/libllama.dylib name: llama-bin-osx-${{ matrix.build }}.dylib - name: Upload Llava uses: actions/upload-artifact@v3 with: path: ./build/examples/llava/libllava_shared.dylib name: llava-bin-osx-${{ matrix.build }}.dylib - name: Upload Metal if: ${{ matrix.build != 'x64' }} uses: actions/upload-artifact@v3 with: path: ./build/bin/ggml-metal.metal name: ggml-metal.metal build-deps: runs-on: ubuntu-latest name: "Gather Binaries" if: ${{ always() }} needs: [ "compile-linux", "compile-macos", "compile-windows", "compile-cublas", "compile-clblast" ] steps: - uses: actions/download-artifact@v4 with: path: artifacts - name: List Files run: ls -R - name: Rearrange Files run: | # Make all directories at once mkdir --parents deps/{avx,avx2,avx512,osx-arm64,osx-x64,cu11.7.1,cu12.1.0,clblast} cp artifacts/llama-bin-linux-noavx-x64.so/libllama.so deps/libllama.so cp artifacts/llama-bin-linux-avx-x64.so/libllama.so deps/avx/libllama.so cp artifacts/llama-bin-linux-avx2-x64.so/libllama.so deps/avx2/libllama.so cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/llama.dll cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/llama.dll cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/llama.dll cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/llama.dll cp artifacts/llava-bin-linux-noavx-x64.so/libllava_shared.so deps/libllava_shared.so cp artifacts/llava-bin-linux-avx-x64.so/libllava_shared.so deps/avx/libllava_shared.so cp artifacts/llava-bin-linux-avx2-x64.so/libllava_shared.so deps/avx2/libllava_shared.so cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/libllama.dll cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/libllama.dll cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/libllama.dll cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/libllama.dll cp artifacts/llava-bin-win-noavx-x64.dll/llava_shared.dll deps/llava_shared.dll cp artifacts/llava-bin-win-avx-x64.dll/llava_shared.dll deps/avx/llava_shared.dll cp artifacts/llava-bin-win-avx2-x64.dll/llava_shared.dll deps/avx2/llava_shared.dll cp artifacts/llava-bin-win-avx512-x64.dll/llava_shared.dll deps/avx512/llava_shared.dll cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib cp artifacts/llava-bin-osx-arm64.dylib/libllava_shared.dylib deps/osx-arm64/libllava_shared.dylib cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib cp artifacts/llava-bin-osx-x64.dylib/libllava_shared.dylib deps/osx-x64/libllava_shared.dylib cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll deps/cu11.7.1/llama.dll cp artifacts/llama-bin-linux-cublas-cu11.7.1-x64.so/libllama.so deps/cu11.7.1/libllama.so cp artifacts/llama-bin-win-cublas-cu12.1.0-x64.dll/llama.dll deps/cu12.1.0/llama.dll cp artifacts/llama-bin-linux-cublas-cu12.1.0-x64.so/libllama.so deps/cu12.1.0/libllama.so cp artifacts/llama-bin-win-clblast-x64.dll/{llama,clblast}.dll deps/clblast/ cp artifacts/llama-bin-linux-clblast-x64.so/libllama.so deps/clblast/ - name: Upload artifacts uses: actions/upload-artifact@v3 with: path: deps/ name: deps - name: Remove Artifacts uses: geekyeggo/delete-artifact@v2 with: name: | llama-* llava-* *.metal