diff --git a/LLama.Web/Common/ParameterOptions.cs b/LLama.Web/Common/ParameterOptions.cs
index 3cdd3701..7677f04a 100644
--- a/LLama.Web/Common/ParameterOptions.cs
+++ b/LLama.Web/Common/ParameterOptions.cs
@@ -1,9 +1,99 @@
 ﻿using LLama.Common;
+using LLama.Abstractions;
 
 namespace LLama.Web.Common
 {
-    public class ParameterOptions : InferenceParams
-    {
+    public class ParameterOptions : IInferenceParams
+	{
         public string Name { get; set; }
-    }
+
+
+
+		/// <summary>
+		/// number of tokens to keep from initial prompt
+		/// </summary>
+		public int TokensKeep { get; set; } = 0;
+		/// <summary>
+		/// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
+		/// until it complete.
+		/// </summary>
+		public int MaxTokens { get; set; } = -1;
+		/// <summary>
+		/// logit bias for specific tokens
+		/// </summary>
+		public Dictionary<int, float>? LogitBias { get; set; } = null;
+
+		/// <summary>
+		/// Sequences where the model will stop generating further tokens.
+		/// </summary>
+		public IEnumerable<string> AntiPrompts { get; set; } = Array.Empty<string>();
+		/// <summary>
+		/// path to file for saving/loading model eval state
+		/// </summary>
+		public string PathSession { get; set; } = string.Empty;
+		/// <summary>
+		/// string to suffix user inputs with
+		/// </summary>
+		public string InputSuffix { get; set; } = string.Empty;
+		/// <summary>
+		/// string to prefix user inputs with
+		/// </summary>
+		public string InputPrefix { get; set; } = string.Empty;
+		/// <summary>
+		///  0 or lower to use vocab size
+		/// </summary>
+		public int TopK { get; set; } = 40;
+		/// <summary>
+		/// 1.0 = disabled
+		/// </summary>
+		public float TopP { get; set; } = 0.95f;
+		/// <summary>
+		/// 1.0 = disabled
+		/// </summary>
+		public float TfsZ { get; set; } = 1.0f;
+		/// <summary>
+		/// 1.0 = disabled
+		/// </summary>
+		public float TypicalP { get; set; } = 1.0f;
+		/// <summary>
+		/// 1.0 = disabled
+		/// </summary>
+		public float Temperature { get; set; } = 0.8f;
+		/// <summary>
+		/// 1.0 = disabled
+		/// </summary>
+		public float RepeatPenalty { get; set; } = 1.1f;
+		/// <summary>
+		/// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n)
+		/// </summary>
+		public int RepeatLastTokensCount { get; set; } = 64;
+		/// <summary>
+		/// frequency penalty coefficient
+		/// 0.0 = disabled
+		/// </summary>
+		public float FrequencyPenalty { get; set; } = .0f;
+		/// <summary>
+		/// presence penalty coefficient
+		/// 0.0 = disabled
+		/// </summary>
+		public float PresencePenalty { get; set; } = .0f;
+		/// <summary>
+		/// Mirostat uses tokens instead of words.
+		/// algorithm described in the paper https://arxiv.org/abs/2007.14966.
+		/// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+		/// </summary>
+		public MirostatType Mirostat { get; set; } = MirostatType.Disable;
+		/// <summary>
+		/// target entropy
+		/// </summary>
+		public float MirostatTau { get; set; } = 5.0f;
+		/// <summary>
+		/// learning rate
+		/// </summary>
+		public float MirostatEta { get; set; } = 0.1f;
+		/// <summary>
+		/// consider newlines as a repeatable token (penalize_nl)
+		/// </summary>
+		public bool PenalizeNL { get; set; } = true;
+	}
 }
diff --git a/LLama/Abstractions/IInferenceParams.cs b/LLama/Abstractions/IInferenceParams.cs
new file mode 100644
index 00000000..73cbbfd2
--- /dev/null
+++ b/LLama/Abstractions/IInferenceParams.cs
@@ -0,0 +1,117 @@
+﻿using System.Collections.Generic;
+using LLama.Common;
+
+namespace LLama.Abstractions
+{  
+	 /// <summary>
+	 /// The paramters used for inference.
+	 /// </summary>
+	public interface IInferenceParams
+    {
+		/// <summary>
+		/// number of tokens to keep from initial prompt
+		/// </summary>
+		public int TokensKeep { get; set; }
+
+		/// <summary>
+		/// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
+		/// until it complete.
+		/// </summary>
+		public int MaxTokens { get; set; }
+
+		/// <summary>
+		/// logit bias for specific tokens
+		/// </summary>
+		public Dictionary<int, float>? LogitBias { get; set; }
+
+
+		/// <summary>
+		/// Sequences where the model will stop generating further tokens.
+		/// </summary>
+		public IEnumerable<string> AntiPrompts { get; set; }
+
+		/// <summary>
+		/// path to file for saving/loading model eval state
+		/// </summary>
+		public string PathSession { get; set; }
+
+		/// <summary>
+		/// string to suffix user inputs with
+		/// </summary>
+		public string InputSuffix { get; set; }
+
+		/// <summary>
+		/// string to prefix user inputs with
+		/// </summary>
+		public string InputPrefix { get; set; }
+
+		/// <summary>
+		///  0 or lower to use vocab size
+		/// </summary>
+		public int TopK { get; set; }
+
+		/// <summary>
+		/// 1.0 = disabled
+		/// </summary>
+		public float TopP { get; set; }
+
+		/// <summary>
+		/// 1.0 = disabled
+		/// </summary>
+		public float TfsZ { get; set; }
+
+		/// <summary>
+		/// 1.0 = disabled
+		/// </summary>
+		public float TypicalP { get; set; }
+
+		/// <summary>
+		/// 1.0 = disabled
+		/// </summary>
+		public float Temperature { get; set; }
+
+		/// <summary>
+		/// 1.0 = disabled
+		/// </summary>
+		public float RepeatPenalty { get; set; }
+
+		/// <summary>
+		/// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n)
+		/// </summary>
+		public int RepeatLastTokensCount { get; set; }
+
+		/// <summary>
+		/// frequency penalty coefficient
+		/// 0.0 = disabled
+		/// </summary>
+		public float FrequencyPenalty { get; set; }
+
+		/// <summary>
+		/// presence penalty coefficient
+		/// 0.0 = disabled
+		/// </summary>
+		public float PresencePenalty { get; set; }
+
+		/// <summary>
+		/// Mirostat uses tokens instead of words.
+		/// algorithm described in the paper https://arxiv.org/abs/2007.14966.
+		/// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+		/// </summary>
+		public MirostatType Mirostat { get; set; }
+
+		/// <summary>
+		/// target entropy
+		/// </summary>
+		public float MirostatTau { get; set; }
+
+		/// <summary>
+		/// learning rate
+		/// </summary>
+		public float MirostatEta { get; set; }
+
+		/// <summary>
+		/// consider newlines as a repeatable token (penalize_nl)
+		/// </summary>
+		public bool PenalizeNL { get; set; }
+	}
+}
\ No newline at end of file
diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs
index d35e075e..6a750895 100644
--- a/LLama/Abstractions/ILLamaExecutor.cs
+++ b/LLama/Abstractions/ILLamaExecutor.cs
@@ -23,7 +23,7 @@ namespace LLama.Abstractions
         /// <param name="inferenceParams">Any additional parameters</param>
         /// <param name="token">A cancellation token.</param>
         /// <returns></returns>
-        IEnumerable<string> Infer(string text, InferenceParams? inferenceParams = null, CancellationToken token = default);
+        IEnumerable<string> Infer(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default);
 
         /// <summary>
         /// Asynchronously infers a response from the model.
@@ -32,6 +32,6 @@ namespace LLama.Abstractions
         /// <param name="inferenceParams">Any additional parameters</param>
         /// <param name="token">A cancellation token.</param>
         /// <returns></returns>
-        IAsyncEnumerable<string> InferAsync(string text, InferenceParams? inferenceParams = null, CancellationToken token = default);
+        IAsyncEnumerable<string> InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default);
     }
 }
diff --git a/LLama/ChatSession.cs b/LLama/ChatSession.cs
index b87e8984..4a4544b0 100644
--- a/LLama/ChatSession.cs
+++ b/LLama/ChatSession.cs
@@ -138,7 +138,7 @@ namespace LLama
         /// <param name="inferenceParams"></param>
         /// <param name="cancellationToken"></param>
         /// <returns></returns>
-        public IEnumerable<string> Chat(ChatHistory history, InferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
+        public IEnumerable<string> Chat(ChatHistory history, IInferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
         {
             var prompt = HistoryTransform.HistoryToText(history);
             History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.User, prompt).Messages);
@@ -159,7 +159,7 @@ namespace LLama
         /// <param name="inferenceParams"></param>
         /// <param name="cancellationToken"></param>
         /// <returns></returns>
-        public IEnumerable<string> Chat(string prompt, InferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
+        public IEnumerable<string> Chat(string prompt, IInferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
         {
             foreach(var inputTransform in InputTransformPipeline)
             {
@@ -182,7 +182,7 @@ namespace LLama
         /// <param name="inferenceParams"></param>
         /// <param name="cancellationToken"></param>
         /// <returns></returns>
-        public async IAsyncEnumerable<string> ChatAsync(ChatHistory history, InferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+        public async IAsyncEnumerable<string> ChatAsync(ChatHistory history, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
         {
             var prompt = HistoryTransform.HistoryToText(history);
             History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.User, prompt).Messages);
@@ -202,7 +202,7 @@ namespace LLama
         /// <param name="inferenceParams"></param>
         /// <param name="cancellationToken"></param>
         /// <returns></returns>
-        public async IAsyncEnumerable<string> ChatAsync(string prompt, InferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+        public async IAsyncEnumerable<string> ChatAsync(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
         {
             foreach (var inputTransform in InputTransformPipeline)
             {
@@ -218,13 +218,13 @@ namespace LLama
             History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.Assistant, sb.ToString()).Messages);
         }
 
-        private IEnumerable<string> ChatInternal(string prompt, InferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
+        private IEnumerable<string> ChatInternal(string prompt, IInferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
         {
             var results = _executor.Infer(prompt, inferenceParams, cancellationToken);
             return OutputTransform.Transform(results);
         }
 
-        private async IAsyncEnumerable<string> ChatAsyncInternal(string prompt, InferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+        private async IAsyncEnumerable<string> ChatAsyncInternal(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
         {
             var results = _executor.InferAsync(prompt, inferenceParams, cancellationToken);
             await foreach (var item in OutputTransform.TransformAsync(results))
diff --git a/LLama/Common/InferenceParams.cs b/LLama/Common/InferenceParams.cs
index 77af7eaf..001a8f8e 100644
--- a/LLama/Common/InferenceParams.cs
+++ b/LLama/Common/InferenceParams.cs
@@ -1,4 +1,5 @@
-﻿using System;
+﻿using LLama.Abstractions;
+using System;
 using System.Collections.Generic;
 
 namespace LLama.Common
@@ -7,7 +8,7 @@ namespace LLama.Common
     /// <summary>
     /// The paramters used for inference.
     /// </summary>
-    public class InferenceParams
+    public class InferenceParams : IInferenceParams
     {
         /// <summary>
         /// number of tokens to keep from initial prompt
diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs
index afbc0f25..dbd1b593 100644
--- a/LLama/LLamaExecutorBase.cs
+++ b/LLama/LLamaExecutorBase.cs
@@ -231,13 +231,13 @@ namespace LLama
         /// <param name="args"></param>
         /// <param name="extraOutputs"></param>
         /// <returns></returns>
-        protected abstract bool PostProcess(InferenceParams inferenceParams, InferStateArgs args, out IEnumerable<string>? extraOutputs);
+        protected abstract bool PostProcess(IInferenceParams inferenceParams, InferStateArgs args, out IEnumerable<string>? extraOutputs);
         /// <summary>
         /// The core inference logic.
         /// </summary>
         /// <param name="inferenceParams"></param>
         /// <param name="args"></param>
-        protected abstract void InferInternal(InferenceParams inferenceParams, InferStateArgs args);
+        protected abstract void InferInternal(IInferenceParams inferenceParams, InferStateArgs args);
         /// <summary>
         /// Save the current state to a file.
         /// </summary>
@@ -267,7 +267,7 @@ namespace LLama
         /// <param name="inferenceParams"></param>
         /// <param name="cancellationToken"></param>
         /// <returns></returns>
-        public virtual IEnumerable<string> Infer(string text, InferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
+        public virtual IEnumerable<string> Infer(string text, IInferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
         {
             cancellationToken.ThrowIfCancellationRequested();
             if (inferenceParams is null)
@@ -324,7 +324,7 @@ namespace LLama
         /// <param name="inferenceParams"></param>
         /// <param name="cancellationToken"></param>
         /// <returns></returns>
-        public virtual async IAsyncEnumerable<string> InferAsync(string text, InferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+        public virtual async IAsyncEnumerable<string> InferAsync(string text, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
         {
             foreach (var result in Infer(text, inferenceParams, cancellationToken))
             {
diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs
index 89fbac59..e055c147 100644
--- a/LLama/LLamaInstructExecutor.cs
+++ b/LLama/LLamaInstructExecutor.cs
@@ -1,4 +1,5 @@
-﻿using LLama.Common;
+﻿using LLama.Abstractions;
+using LLama.Common;
 using LLama.Native;
 using System;
 using System.Collections.Generic;
@@ -136,7 +137,7 @@ namespace LLama
             }
         }
         /// <inheritdoc />
-        protected override bool PostProcess(InferenceParams inferenceParams, InferStateArgs args, out IEnumerable<string>? extraOutputs)
+        protected override bool PostProcess(IInferenceParams inferenceParams, InferStateArgs args, out IEnumerable<string>? extraOutputs)
         {
             extraOutputs = null;
             if (_embed_inps.Count <= _consumedTokensCount)
@@ -179,7 +180,7 @@ namespace LLama
             return false;
         }
         /// <inheritdoc />
-        protected override void InferInternal(InferenceParams inferenceParams, InferStateArgs args)
+        protected override void InferInternal(IInferenceParams inferenceParams, InferStateArgs args)
         {
             if (_embeds.Count > 0)
             {
diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs
index bc3a242e..f5c1583e 100644
--- a/LLama/LLamaInteractExecutor.cs
+++ b/LLama/LLamaInteractExecutor.cs
@@ -1,5 +1,6 @@
 ﻿using LLama.Common;
 using LLama.Native;
+using LLama.Abstractions;
 using System;
 using System.Collections.Generic;
 using System.IO;
@@ -122,7 +123,7 @@ namespace LLama
         /// </summary>
         /// <param name="args"></param>
         /// <returns></returns>
-        protected override bool PostProcess(InferenceParams inferenceParams, InferStateArgs args, out IEnumerable<string>? extraOutputs)
+        protected override bool PostProcess(IInferenceParams inferenceParams, InferStateArgs args, out IEnumerable<string>? extraOutputs)
         {
             extraOutputs = null;
             if (_embed_inps.Count <= _consumedTokensCount)
@@ -166,7 +167,7 @@ namespace LLama
         }
 
         /// <inheritdoc />
-        protected override void InferInternal(InferenceParams inferenceParams, InferStateArgs args)
+        protected override void InferInternal(IInferenceParams inferenceParams, InferStateArgs args)
         {
             if (_embeds.Count > 0)
             {
diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs
index 88fa1695..dd0497c9 100644
--- a/LLama/LLamaStatelessExecutor.cs
+++ b/LLama/LLamaStatelessExecutor.cs
@@ -36,7 +36,7 @@ namespace LLama
         }
 
         /// <inheritdoc />
-        public IEnumerable<string> Infer(string text, InferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
+        public IEnumerable<string> Infer(string text, IInferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
         {
             cancellationToken.ThrowIfCancellationRequested();
             int n_past = 1;
@@ -123,7 +123,7 @@ namespace LLama
         }
 
         /// <inheritdoc />
-        public async IAsyncEnumerable<string> InferAsync(string text, InferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+        public async IAsyncEnumerable<string> InferAsync(string text, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
         {
             foreach (var result in Infer(text, inferenceParams, cancellationToken))
             {