diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 00000000..b00368fb
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,55 @@
+name: CI
+on:
+ push:
+ branches: [master]
+ pull_request:
+ branches: [master]
+
+jobs:
+ build:
+ name: Test
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ build: [linux-debug, linux-release, macos-debug, macos-release, windows-debug, windows-release]
+ include:
+ - build: linux-debug
+ os: ubuntu-latest
+ config: debug
+ - build: linux-release
+ os: ubuntu-latest
+ config: release
+ - build: macos-debug
+ os: macos-latest
+ config: debug
+ - build: macos-release
+ os: macos-latest
+ config: release
+ - build: windows-debug
+ os: windows-2019
+ config: debug
+ - build: windows-release
+ os: windows-2019
+ config: release
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions/setup-dotnet@v1
+ with:
+ dotnet-version: |
+ 6.0.x
+ 7.0.x
+ - name: Cache Gradle packages
+ uses: actions/cache@v3
+ with:
+ key: "unit_test_models"
+ path: LLama.Unittest/Models
+ # workaround for actions/setup-dotnet#155
+ - name: Clear package cache
+ run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
+ - name: Restore packages
+ run: dotnet restore LLamaSharp.sln
+ - name: Build
+ run: dotnet build LLamaSharp.sln -c ${{ matrix.config }} --no-restore
+ - name: Test
+ run: dotnet test LLamaSharp.sln -c ${{ matrix.config }}
diff --git a/.gitignore b/.gitignore
index d1d0ba40..e7c87968 100644
--- a/.gitignore
+++ b/.gitignore
@@ -341,4 +341,7 @@ test/TensorFlowNET.Examples/mnist
*.xsd
# docs
-site/
\ No newline at end of file
+site/
+
+/LLama.Unittest/Models/*.bin
+
diff --git a/LLama.Unittest/BasicTest.cs b/LLama.Unittest/BasicTest.cs
index 29178432..308b13ad 100644
--- a/LLama.Unittest/BasicTest.cs
+++ b/LLama.Unittest/BasicTest.cs
@@ -1,11 +1,15 @@
+using LLama;
+using LLama.Common;
+
namespace LLama.Unittest
{
public class BasicTest
{
[Fact]
- public void SimpleQA()
+ public void LoadModel()
{
-
+ var model = new LLamaModel(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256));
+ model.Dispose();
}
}
}
\ No newline at end of file
diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj
index 93922e81..81e71a88 100644
--- a/LLama.Unittest/LLama.Unittest.csproj
+++ b/LLama.Unittest/LLama.Unittest.csproj
@@ -23,8 +23,22 @@
+
+
+
+
+
+
+
+
+
+
+
+ PreserveNewest
+
+
diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs
index 84bc992c..68d64a88 100644
--- a/LLama/Common/FixedSizeQueue.cs
+++ b/LLama/Common/FixedSizeQueue.cs
@@ -30,6 +30,7 @@ namespace LLama.Common
///
public FixedSizeQueue(int size, IEnumerable data)
{
+#if NETCOREAPP3_0_OR_GREATER
// Try an early check on the amount of data supplied (if possible)
#if NETSTANDARD2_0
var dataCount = data.Count();
@@ -52,7 +53,7 @@ namespace LLama.Common
throw new ArgumentException($"The max size set for the quene is {size}, but got {count} initial values.");
#endif
}
-
+/
///
/// Replace every item in the queue with the given value
///
diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs
index 2a591bcd..4f72eff3 100644
--- a/LLama/Common/ModelParams.cs
+++ b/LLama/Common/ModelParams.cs
@@ -84,7 +84,7 @@ namespace LLama.Common
///
/// how split tensors should be distributed across GPUs
///
- public float[] TensorSplits { get; set; } = new float[] { 0 };
+ public nint TensorSplits { get; set; }
///
///
diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs
index 42f2be3f..0ede4e76 100644
--- a/LLama/Native/LLamaContextParams.cs
+++ b/LLama/Native/LLamaContextParams.cs
@@ -47,7 +47,8 @@ namespace LLama.Native
///
/// how to split layers across multiple GPUs
///
- public float[] tensor_split;
+ public nint tensor_split;
+
///
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
@@ -78,6 +79,11 @@ namespace LLama.Native
[MarshalAs(UnmanagedType.I1)]
public bool low_vram;
+ ///
+ /// if true, use experimental mul_mat_q kernels
+ ///
+ [MarshalAs(UnmanagedType.I1)] public bool mul_mat_q;
+
///
/// use fp16 for KV cache
///
@@ -114,9 +120,5 @@ namespace LLama.Native
[MarshalAs(UnmanagedType.I1)]
public bool embedding;
}
-
- public struct TensorSplits
- {
- public float Item1;
- }
}
+
diff --git a/LLama/Utils.cs b/LLama/Utils.cs
index c08912cf..e99e6b29 100644
--- a/LLama/Utils.cs
+++ b/LLama/Utils.cs
@@ -28,12 +28,14 @@ namespace LLama
lparams.logits_all = @params.Perplexity;
lparams.embedding = @params.EmbeddingMode;
lparams.low_vram = @params.LowVram;
-
+
+ /*
if (@params.TensorSplits.Length != 1)
{
throw new ArgumentException("Currently multi-gpu support is not supported by " +
"both llama.cpp and LLamaSharp.");
- }
+ }*/
+
lparams.tensor_split = @params.TensorSplits;
if (!File.Exists(@params.ModelPath))
diff --git a/LLama/runtimes/libllama.dylib b/LLama/runtimes/libllama.dylib
deleted file mode 100755
index 7cd1f4ab..00000000
Binary files a/LLama/runtimes/libllama.dylib and /dev/null differ