diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs index 6412409e..42f2be3f 100644 --- a/LLama/Native/LLamaContextParams.cs +++ b/LLama/Native/LLamaContextParams.cs @@ -32,7 +32,7 @@ namespace LLama.Native /// /// rms norm epsilon (TEMP - will be moved to model hparams) /// - float rms_norm_eps; + public float rms_norm_eps; /// /// number of layers to store in VRAM @@ -47,19 +47,19 @@ namespace LLama.Native /// /// how to split layers across multiple GPUs /// - public TensorSplits tensor_split; + public float[] tensor_split; /// /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE base frequency /// - float rope_freq_base; + public float rope_freq_base; /// /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE frequency scaling factor /// - float rope_freq_scale; + public float rope_freq_scale; /// /// called with a progress value between 0 and 1, pass NULL to disable diff --git a/LLama/Utils.cs b/LLama/Utils.cs index c2dbf7aa..c08912cf 100644 --- a/LLama/Utils.cs +++ b/LLama/Utils.cs @@ -28,16 +28,13 @@ namespace LLama lparams.logits_all = @params.Perplexity; lparams.embedding = @params.EmbeddingMode; lparams.low_vram = @params.LowVram; - - if(@params.TensorSplits.Length != 1) + + if (@params.TensorSplits.Length != 1) { throw new ArgumentException("Currently multi-gpu support is not supported by " + "both llama.cpp and LLamaSharp."); } - lparams.tensor_split = new TensorSplits() - { - Item1 = @params.TensorSplits[0] - }; + lparams.tensor_split = @params.TensorSplits; if (!File.Exists(@params.ModelPath)) {