diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs
index 6412409e..42f2be3f 100644
--- a/LLama/Native/LLamaContextParams.cs
+++ b/LLama/Native/LLamaContextParams.cs
@@ -32,7 +32,7 @@ namespace LLama.Native
///
/// rms norm epsilon (TEMP - will be moved to model hparams)
///
- float rms_norm_eps;
+ public float rms_norm_eps;
///
/// number of layers to store in VRAM
@@ -47,19 +47,19 @@ namespace LLama.Native
///
/// how to split layers across multiple GPUs
///
- public TensorSplits tensor_split;
+ public float[] tensor_split;
///
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
/// RoPE base frequency
///
- float rope_freq_base;
+ public float rope_freq_base;
///
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
/// RoPE frequency scaling factor
///
- float rope_freq_scale;
+ public float rope_freq_scale;
///
/// called with a progress value between 0 and 1, pass NULL to disable
diff --git a/LLama/Utils.cs b/LLama/Utils.cs
index c2dbf7aa..c08912cf 100644
--- a/LLama/Utils.cs
+++ b/LLama/Utils.cs
@@ -28,16 +28,13 @@ namespace LLama
lparams.logits_all = @params.Perplexity;
lparams.embedding = @params.EmbeddingMode;
lparams.low_vram = @params.LowVram;
-
- if(@params.TensorSplits.Length != 1)
+
+ if (@params.TensorSplits.Length != 1)
{
throw new ArgumentException("Currently multi-gpu support is not supported by " +
"both llama.cpp and LLamaSharp.");
}
- lparams.tensor_split = new TensorSplits()
- {
- Item1 = @params.TensorSplits[0]
- };
+ lparams.tensor_split = @params.TensorSplits;
if (!File.Exists(@params.ModelPath))
{