Browse Source

LLamaContextParams epsilon and tensor split changes

tags/v0.4.2-preview
sa_ddam213 2 years ago
parent
commit
3e252c81f6
2 changed files with 7 additions and 10 deletions
  1. +4
    -4
      LLama/Native/LLamaContextParams.cs
  2. +3
    -6
      LLama/Utils.cs

+ 4
- 4
LLama/Native/LLamaContextParams.cs View File

@@ -32,7 +32,7 @@ namespace LLama.Native
/// <summary>
/// rms norm epsilon (TEMP - will be moved to model hparams)
/// </summary>
float rms_norm_eps;
public float rms_norm_eps;

/// <summary>
/// number of layers to store in VRAM
@@ -47,19 +47,19 @@ namespace LLama.Native
/// <summary>
/// how to split layers across multiple GPUs
/// </summary>
public TensorSplits tensor_split;
public float[] tensor_split;

/// <summary>
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
/// RoPE base frequency
/// </summary>
float rope_freq_base;
public float rope_freq_base;

/// <summary>
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
/// RoPE frequency scaling factor
/// </summary>
float rope_freq_scale;
public float rope_freq_scale;

/// <summary>
/// called with a progress value between 0 and 1, pass NULL to disable


+ 3
- 6
LLama/Utils.cs View File

@@ -28,16 +28,13 @@ namespace LLama
lparams.logits_all = @params.Perplexity;
lparams.embedding = @params.EmbeddingMode;
lparams.low_vram = @params.LowVram;
if(@params.TensorSplits.Length != 1)
if (@params.TensorSplits.Length != 1)
{
throw new ArgumentException("Currently multi-gpu support is not supported by " +
"both llama.cpp and LLamaSharp.");
}
lparams.tensor_split = new TensorSplits()
{
Item1 = @params.TensorSplits[0]
};
lparams.tensor_split = @params.TensorSplits;

if (!File.Exists(@params.ModelPath))
{


Loading…
Cancel
Save