diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs
index 5cb81078..2230c70c 100644
--- a/LLama/Common/ModelParams.cs
+++ b/LLama/Common/ModelParams.cs
@@ -128,17 +128,17 @@ namespace LLama.Common
/// Batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
/// Whether to convert eos to newline during the inference.
/// Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.
- /// Grouped-Query Attention
- /// RMS Norm Epsilon
- /// RoPE base frequency.
- /// RoPE frequency scaling factor
- /// Use experimental mul_mat_q kernels
+ /// Grouped-Query Attention
+ /// RMS Norm Epsilon
+ /// RoPE base frequency.
+ /// RoPE frequency scaling factor
+ /// Use experimental mul_mat_q kernels
public ModelParams(string modelPath, int contextSize = 512, int gpuLayerCount = 20,
int seed = 1337, bool useFp16Memory = true,
bool useMemorymap = true, bool useMemoryLock = false, bool perplexity = false,
string loraAdapter = "", string loraBase = "", int threads = -1, int batchSize = 512,
bool convertEosToNewLine = false, bool embeddingMode = false,
- int gqa = 1, float rmsNormEps = 5e-6f, float rope_freq_base = 10000.0f, float rope_freq_scale = 1f, bool muMatQ = false)
+ int groupedQueryAttention = 1, float rmsNormEpsilon = 5e-6f, float ropeFrequencyBase = 10000.0f, float ropeFrequencyScale = 1f, bool mulMatQ = false)
{
ContextSize = contextSize;
GpuLayerCount = gpuLayerCount;
@@ -154,11 +154,11 @@ namespace LLama.Common
BatchSize = batchSize;
ConvertEosToNewLine = convertEosToNewLine;
EmbeddingMode = embeddingMode;
- GroupedQueryAttention = gqa;
- RmsNormEpsilon = rmsNormEps;
- RopeFrequencyBase = rope_freq_base;
- RopeFrequencyScale = rope_freq_scale;
- MulMatQ = muMatQ;
+ GroupedQueryAttention = groupedQueryAttention;
+ RmsNormEpsilon = rmsNormEpsilon;
+ RopeFrequencyBase = ropeFrequencyBase;
+ RopeFrequencyScale = ropeFrequencyScale;
+ MulMatQ = mulMatQ;
}
}
}