|
|
|
@@ -111,54 +111,6 @@ namespace LLama.Common |
|
|
|
// This constructor (default parameterless constructor) is used by Newtonsoft to deserialize! |
|
|
|
ModelPath = ""; |
|
|
|
} |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// |
|
|
|
/// </summary> |
|
|
|
/// <param name="modelPath">The model path.</param> |
|
|
|
/// <param name="contextSize">Model context size (n_ctx)</param> |
|
|
|
/// <param name="gpuLayerCount">Number of layers to run in VRAM / GPU memory (n_gpu_layers)</param> |
|
|
|
/// <param name="seed">Seed for the random number generator (seed)</param> |
|
|
|
/// <param name="useFp16Memory">Whether to use f16 instead of f32 for memory kv (memory_f16)</param> |
|
|
|
/// <param name="useMemorymap">Whether to use mmap for faster loads (use_mmap)</param> |
|
|
|
/// <param name="useMemoryLock">Whether to use mlock to keep model in memory (use_mlock)</param> |
|
|
|
/// <param name="perplexity">Thether to compute perplexity over the prompt (perplexity)</param> |
|
|
|
/// <param name="loraAdapter">Lora adapter path (lora_adapter)</param> |
|
|
|
/// <param name="loraBase">Base model path for the lora adapter (lora_base)</param> |
|
|
|
/// <param name="threads">Number of threads (-1 = autodetect) (n_threads)</param> |
|
|
|
/// <param name="batchSize">Batch size for prompt processing (must be >=32 to use BLAS) (n_batch)</param> |
|
|
|
/// <param name="embeddingMode">Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.</param> |
|
|
|
/// <param name="ropeFrequencyBase">RoPE base frequency.</param> |
|
|
|
/// <param name="ropeFrequencyScale">RoPE frequency scaling factor</param> |
|
|
|
/// <param name="mulMatQ">Use experimental mul_mat_q kernels</param> |
|
|
|
/// <param name="encoding">The encoding to use to convert text for the model</param> |
|
|
|
[Obsolete("Use object initializer to set all optional parameters")] |
|
|
|
public ModelParams(string modelPath, uint contextSize = 512, int gpuLayerCount = 20, |
|
|
|
uint seed = 1337, bool useFp16Memory = true, |
|
|
|
bool useMemorymap = true, bool useMemoryLock = false, bool perplexity = false, |
|
|
|
string loraAdapter = "", string loraBase = "", int threads = -1, uint batchSize = 512, |
|
|
|
bool embeddingMode = false, |
|
|
|
float? ropeFrequencyBase = null, float? ropeFrequencyScale = null, bool mulMatQ = false, |
|
|
|
string encoding = "UTF-8") |
|
|
|
{ |
|
|
|
ContextSize = contextSize; |
|
|
|
GpuLayerCount = gpuLayerCount; |
|
|
|
Seed = seed; |
|
|
|
UseFp16Memory = useFp16Memory; |
|
|
|
UseMemorymap = useMemorymap; |
|
|
|
UseMemoryLock = useMemoryLock; |
|
|
|
Perplexity = perplexity; |
|
|
|
ModelPath = modelPath; |
|
|
|
LoraBase = loraBase; |
|
|
|
Threads = threads < 1 ? null : (uint)threads; |
|
|
|
BatchSize = batchSize; |
|
|
|
EmbeddingMode = embeddingMode; |
|
|
|
RopeFrequencyBase = ropeFrequencyBase; |
|
|
|
RopeFrequencyScale = ropeFrequencyScale; |
|
|
|
MulMatQ = mulMatQ; |
|
|
|
Encoding = Encoding.GetEncoding(encoding); |
|
|
|
LoraAdapters.Add(new LoraAdapter(loraAdapter, 1)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
internal class EncodingConverter |
|
|
|
|