diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..b00368fb --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,55 @@ +name: CI +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + build: + name: Test + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + build: [linux-debug, linux-release, macos-debug, macos-release, windows-debug, windows-release] + include: + - build: linux-debug + os: ubuntu-latest + config: debug + - build: linux-release + os: ubuntu-latest + config: release + - build: macos-debug + os: macos-latest + config: debug + - build: macos-release + os: macos-latest + config: release + - build: windows-debug + os: windows-2019 + config: debug + - build: windows-release + os: windows-2019 + config: release + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-dotnet@v1 + with: + dotnet-version: | + 6.0.x + 7.0.x + - name: Cache Gradle packages + uses: actions/cache@v3 + with: + key: "unit_test_models" + path: LLama.Unittest/Models + # workaround for actions/setup-dotnet#155 + - name: Clear package cache + run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear + - name: Restore packages + run: dotnet restore LLamaSharp.sln + - name: Build + run: dotnet build LLamaSharp.sln -c ${{ matrix.config }} --no-restore + - name: Test + run: dotnet test LLamaSharp.sln -c ${{ matrix.config }} diff --git a/.gitignore b/.gitignore index d1d0ba40..e7c87968 100644 --- a/.gitignore +++ b/.gitignore @@ -341,4 +341,7 @@ test/TensorFlowNET.Examples/mnist *.xsd # docs -site/ \ No newline at end of file +site/ + +/LLama.Unittest/Models/*.bin + diff --git a/LLama.Unittest/BasicTest.cs b/LLama.Unittest/BasicTest.cs index 29178432..308b13ad 100644 --- a/LLama.Unittest/BasicTest.cs +++ b/LLama.Unittest/BasicTest.cs @@ -1,11 +1,15 @@ +using LLama; +using LLama.Common; + namespace LLama.Unittest { public class BasicTest { [Fact] - public void SimpleQA() + public void LoadModel() { - + var model = new LLamaModel(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256)); + model.Dispose(); } } } \ No newline at end of file diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj index 93922e81..81e71a88 100644 --- a/LLama.Unittest/LLama.Unittest.csproj +++ b/LLama.Unittest/LLama.Unittest.csproj @@ -23,8 +23,22 @@ + + + + + + + + + + + + PreserveNewest + + diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs index 84bc992c..68d64a88 100644 --- a/LLama/Common/FixedSizeQueue.cs +++ b/LLama/Common/FixedSizeQueue.cs @@ -30,6 +30,7 @@ namespace LLama.Common /// public FixedSizeQueue(int size, IEnumerable data) { +#if NETCOREAPP3_0_OR_GREATER // Try an early check on the amount of data supplied (if possible) #if NETSTANDARD2_0 var dataCount = data.Count(); @@ -52,7 +53,7 @@ namespace LLama.Common throw new ArgumentException($"The max size set for the quene is {size}, but got {count} initial values."); #endif } - +/ /// /// Replace every item in the queue with the given value /// diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs index 2a591bcd..4f72eff3 100644 --- a/LLama/Common/ModelParams.cs +++ b/LLama/Common/ModelParams.cs @@ -84,7 +84,7 @@ namespace LLama.Common /// /// how split tensors should be distributed across GPUs /// - public float[] TensorSplits { get; set; } = new float[] { 0 }; + public nint TensorSplits { get; set; } /// /// diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs index 42f2be3f..0ede4e76 100644 --- a/LLama/Native/LLamaContextParams.cs +++ b/LLama/Native/LLamaContextParams.cs @@ -47,7 +47,8 @@ namespace LLama.Native /// /// how to split layers across multiple GPUs /// - public float[] tensor_split; + public nint tensor_split; + /// /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 @@ -78,6 +79,11 @@ namespace LLama.Native [MarshalAs(UnmanagedType.I1)] public bool low_vram; + /// + /// if true, use experimental mul_mat_q kernels + /// + [MarshalAs(UnmanagedType.I1)] public bool mul_mat_q; + /// /// use fp16 for KV cache /// @@ -114,9 +120,5 @@ namespace LLama.Native [MarshalAs(UnmanagedType.I1)] public bool embedding; } - - public struct TensorSplits - { - public float Item1; - } } + diff --git a/LLama/Utils.cs b/LLama/Utils.cs index c08912cf..e99e6b29 100644 --- a/LLama/Utils.cs +++ b/LLama/Utils.cs @@ -28,12 +28,14 @@ namespace LLama lparams.logits_all = @params.Perplexity; lparams.embedding = @params.EmbeddingMode; lparams.low_vram = @params.LowVram; - + + /* if (@params.TensorSplits.Length != 1) { throw new ArgumentException("Currently multi-gpu support is not supported by " + "both llama.cpp and LLamaSharp."); - } + }*/ + lparams.tensor_split = @params.TensorSplits; if (!File.Exists(@params.ModelPath)) diff --git a/LLama/runtimes/libllama.dylib b/LLama/runtimes/libllama.dylib deleted file mode 100755 index 7cd1f4ab..00000000 Binary files a/LLama/runtimes/libllama.dylib and /dev/null differ