diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 00000000..b00368fb
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,55 @@
+name: CI
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+jobs:
+  build:
+    name: Test
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        build: [linux-debug, linux-release, macos-debug, macos-release, windows-debug, windows-release]
+        include:
+          - build: linux-debug
+            os: ubuntu-latest
+            config: debug
+          - build: linux-release
+            os: ubuntu-latest
+            config: release
+          - build: macos-debug
+            os: macos-latest
+            config: debug
+          - build: macos-release
+            os: macos-latest
+            config: release
+          - build: windows-debug
+            os: windows-2019
+            config: debug
+          - build: windows-release
+            os: windows-2019
+            config: release
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-dotnet@v1
+      with:
+        dotnet-version: | 
+          6.0.x
+          7.0.x
+    - name: Cache Gradle packages
+      uses: actions/cache@v3
+      with:
+        key: "unit_test_models"
+        path: LLama.Unittest/Models
+    #  workaround for actions/setup-dotnet#155
+    - name: Clear package cache
+      run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
+    - name: Restore packages
+      run: dotnet restore LLamaSharp.sln
+    - name: Build
+      run: dotnet build LLamaSharp.sln -c ${{ matrix.config }} --no-restore
+    - name: Test
+      run: dotnet test LLamaSharp.sln -c ${{ matrix.config }}
diff --git a/.gitignore b/.gitignore
index d1d0ba40..e7c87968 100644
--- a/.gitignore
+++ b/.gitignore
@@ -341,4 +341,7 @@ test/TensorFlowNET.Examples/mnist
 *.xsd
 
 # docs
-site/
\ No newline at end of file
+site/
+
+/LLama.Unittest/Models/*.bin
+
diff --git a/LLama.Unittest/BasicTest.cs b/LLama.Unittest/BasicTest.cs
index 29178432..308b13ad 100644
--- a/LLama.Unittest/BasicTest.cs
+++ b/LLama.Unittest/BasicTest.cs
@@ -1,11 +1,15 @@
+using LLama;
+using LLama.Common;
+
 namespace LLama.Unittest
 {
     public class BasicTest
     {
         [Fact]
-        public void SimpleQA()
+        public void LoadModel()
         {
-            
+            var model = new LLamaModel(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256));
+            model.Dispose();
         }
     }
 }
\ No newline at end of file
diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj
index 93922e81..81e71a88 100644
--- a/LLama.Unittest/LLama.Unittest.csproj
+++ b/LLama.Unittest/LLama.Unittest.csproj
@@ -23,8 +23,22 @@
     </PackageReference>
   </ItemGroup>
 
+  <Target Name="DownloadContentFiles" BeforeTargets="Build">
+      <DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q3_K_S.bin" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.ggmlv3.q3_K_S.bin" SkipUnchangedFiles="true">
+    </DownloadFile>
+  </Target>
+
   <ItemGroup>
     <ProjectReference Include="..\LLama\LLamaSharp.csproj" />
   </ItemGroup>
 
+  <ItemGroup>
+    <Folder Include="Models\" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Update="Models\llama-2-7b-chat.ggmlv3.q3_K_S.bin">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
 </Project>
diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs
index 84bc992c..68d64a88 100644
--- a/LLama/Common/FixedSizeQueue.cs
+++ b/LLama/Common/FixedSizeQueue.cs
@@ -30,6 +30,7 @@ namespace LLama.Common
         /// <param name="data"></param>
         public FixedSizeQueue(int size, IEnumerable<T> data)
         {
+#if NETCOREAPP3_0_OR_GREATER
             // Try an early check on the amount of data supplied (if possible)
 #if NETSTANDARD2_0
             var dataCount = data.Count();
@@ -52,7 +53,7 @@ namespace LLama.Common
                 throw new ArgumentException($"The max size set for the quene is {size}, but got {count} initial values.");
 #endif
         }
-
+/
         /// <summary>
         /// Replace every item in the queue with the given value
         /// </summary>
diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs
index 2a591bcd..4f72eff3 100644
--- a/LLama/Common/ModelParams.cs
+++ b/LLama/Common/ModelParams.cs
@@ -84,7 +84,7 @@ namespace LLama.Common
         /// <summary>
         /// how split tensors should be distributed across GPUs
         /// </summary>
-        public float[] TensorSplits { get; set; } = new float[] { 0 };
+        public nint TensorSplits { get; set; }
 
         /// <summary>
         /// 
diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs
index 42f2be3f..0ede4e76 100644
--- a/LLama/Native/LLamaContextParams.cs
+++ b/LLama/Native/LLamaContextParams.cs
@@ -47,7 +47,8 @@ namespace LLama.Native
         /// <summary>
         /// how to split layers across multiple GPUs
         /// </summary>
-        public float[] tensor_split;
+        public nint tensor_split;
+
 
         /// <summary>
         /// ref: https://github.com/ggerganov/llama.cpp/pull/2054
@@ -78,6 +79,11 @@ namespace LLama.Native
         [MarshalAs(UnmanagedType.I1)]
         public bool low_vram;
 
+        /// <summary>
+        /// if true, use experimental mul_mat_q kernels
+        /// </summary>
+        [MarshalAs(UnmanagedType.I1)] public bool mul_mat_q;
+
         /// <summary>
         /// use fp16 for KV cache
         /// </summary>
@@ -114,9 +120,5 @@ namespace LLama.Native
         [MarshalAs(UnmanagedType.I1)] 
         public bool embedding;
     }
-
-    public struct TensorSplits
-    {
-        public float Item1;
-    }
 }
+
diff --git a/LLama/Utils.cs b/LLama/Utils.cs
index c08912cf..e99e6b29 100644
--- a/LLama/Utils.cs
+++ b/LLama/Utils.cs
@@ -28,12 +28,14 @@ namespace LLama
             lparams.logits_all = @params.Perplexity;
             lparams.embedding = @params.EmbeddingMode;
             lparams.low_vram = @params.LowVram;
-       
+
+            /*
             if (@params.TensorSplits.Length != 1)
             {
                 throw new ArgumentException("Currently multi-gpu support is not supported by " +
                     "both llama.cpp and LLamaSharp.");
-            }
+            }*/
+
             lparams.tensor_split = @params.TensorSplits;
 
             if (!File.Exists(@params.ModelPath))
diff --git a/LLama/runtimes/libllama.dylib b/LLama/runtimes/libllama.dylib
deleted file mode 100755
index 7cd1f4ab..00000000
Binary files a/LLama/runtimes/libllama.dylib and /dev/null differ