From c9108f83117d8cd237464648f73ef2e1c094ef75 Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Wed, 4 Oct 2023 10:40:53 +1300
Subject: [PATCH 1/7] Add service for managing Models and Model Contexts

---
 LLama.Web/Async/AsyncLock.cs        |  55 ++++++++
 LLama.Web/Common/LLamaOptions.cs    |   1 +
 LLama.Web/Common/ModelLoadType.cs   |  30 ++++
 LLama.Web/Common/ModelOptions.cs    | 206 +++++++++++++++-------------
 LLama.Web/LLamaModel.cs             | 106 ++++++++++++++
 LLama.Web/Services/IModelService.cs |  76 ++++++++++
 LLama.Web/Services/ModelService.cs  | 202 +++++++++++++++++++++++++++
 7 files changed, 582 insertions(+), 94 deletions(-)
 create mode 100644 LLama.Web/Async/AsyncLock.cs
 create mode 100644 LLama.Web/Common/ModelLoadType.cs
 create mode 100644 LLama.Web/LLamaModel.cs
 create mode 100644 LLama.Web/Services/IModelService.cs
 create mode 100644 LLama.Web/Services/ModelService.cs
diff --git a/LLama.Web/Async/AsyncLock.cs b/LLama.Web/Async/AsyncLock.cs
new file mode 100644
index 00000000..09ccb0f7
--- /dev/null
+++ b/LLama.Web/Async/AsyncLock.cs
@@ -0,0 +1,55 @@
+﻿namespace LLama.Web.Async
+{
+    /// <summary>
+    /// Create an Async locking using statment
+    /// </summary>
+    public sealed class AsyncLock
+    {
+        private readonly SemaphoreSlim _semaphore;
+        private readonly Task<IDisposable> _releaser;
+
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="AsyncLock"/> class.
+        /// </summary>
+        public AsyncLock()
+        {
+            _semaphore = new SemaphoreSlim(1, 1);
+            _releaser = Task.FromResult((IDisposable)new Releaser(this));
+        }
+
+
+        /// <summary>
+        /// Locks the using statement asynchronously.
+        /// </summary>
+        /// <returns></returns>
+        public Task<IDisposable> LockAsync()
+        {
+            var wait = _semaphore.WaitAsync();
+            if (wait.IsCompleted)
+                return _releaser;
+
+            return wait.ContinueWith((_, state) => (IDisposable)state, _releaser.Result, CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default);
+        }
+
+
+        /// <summary>
+        /// IDisposable wrapper class to release the lock on dispose
+        /// </summary>
+        /// <seealso cref="IDisposable" />
+        private sealed class Releaser : IDisposable
+        {
+            private readonly AsyncLock _lockToRelease;
+
+            internal Releaser(AsyncLock lockToRelease)
+            {
+                _lockToRelease = lockToRelease;
+            }
+
+            public void Dispose()
+            {
+                _lockToRelease._semaphore.Release();
+            }
+        }
+    }
+}
diff --git a/LLama.Web/Common/LLamaOptions.cs b/LLama.Web/Common/LLamaOptions.cs
index 1ac0d829..a64b9635 100644
--- a/LLama.Web/Common/LLamaOptions.cs
+++ b/LLama.Web/Common/LLamaOptions.cs
@@ -2,6 +2,7 @@
 {
     public class LLamaOptions
     {
+        public ModelLoadType ModelLoadType { get; set; }
         public List<ModelOptions> Models { get; set; }
         public List<PromptOptions> Prompts { get; set; } = new List<PromptOptions>();
         public List<ParameterOptions> Parameters { get; set; } = new List<ParameterOptions>();
diff --git a/LLama.Web/Common/ModelLoadType.cs b/LLama.Web/Common/ModelLoadType.cs
new file mode 100644
index 00000000..9e1c77b7
--- /dev/null
+++ b/LLama.Web/Common/ModelLoadType.cs
@@ -0,0 +1,30 @@
+﻿namespace LLama.Web.Common
+{
+    /// <summary>
+    /// The type of model load caching to use
+    /// </summary>
+    public enum ModelLoadType
+    {
+
+        /// <summary>
+        /// Only one model will be loaded into memory at a time, any other models will be unloaded before the new one is loaded
+        /// </summary>
+        Single = 0,
+
+        /// <summary>
+        /// Multiple models will be loaded into memory, ensure you use the ModelConfigs to split the hardware resources
+        /// </summary>
+        Multiple = 1,
+
+        /// <summary>
+        /// The first model in the appsettings.json list will be preloaded into memory at app startup
+        /// </summary>
+        PreloadSingle = 2,
+
+
+        /// <summary>
+        /// All models in the appsettings.json list will be preloaded into memory at app startup, ensure you use the ModelConfigs to split the hardware resources
+        /// </summary>
+        PreloadMultiple = 3,
+    }
+}
diff --git a/LLama.Web/Common/ModelOptions.cs b/LLama.Web/Common/ModelOptions.cs
index f06757e3..c6cf0988 100644
--- a/LLama.Web/Common/ModelOptions.cs
+++ b/LLama.Web/Common/ModelOptions.cs
@@ -3,105 +3,123 @@ using LLama.Abstractions;
 
 namespace LLama.Web.Common
 {
-    public class ModelOptions
-        : IModelParams
+    public class ModelOptions : IModelParams
     {
-      
+        /// <summary>
+        /// Model friendly name
+        /// </summary>
         public string Name { get; set; }
+
+        /// <summary>
+        /// Max context insta=nces allowed per model
+        /// </summary>
         public int MaxInstances { get; set; }
 
+        /// <summary>
+        /// Model context size (n_ctx)
+        /// </summary>
+        public int ContextSize { get; set; } = 512;
+
+        /// <summary>
+        /// the GPU that is used for scratch and small tensors
+        /// </summary>
+        public int MainGpu { get; set; } = 0;
+
+        /// <summary>
+        /// if true, reduce VRAM usage at the cost of performance
+        /// </summary>
+        public bool LowVram { get; set; } = false;
+
+        /// <summary>
+        /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
+        /// </summary>
+        public int GpuLayerCount { get; set; } = 20;
+
+        /// <summary>
+        /// Seed for the random number generator (seed)
+        /// </summary>
+        public int Seed { get; set; } = 1686349486;
+
+        /// <summary>
+        /// Use f16 instead of f32 for memory kv (memory_f16)
+        /// </summary>
+        public bool UseFp16Memory { get; set; } = true;
+
+        /// <summary>
+        /// Use mmap for faster loads (use_mmap)
+        /// </summary>
+        public bool UseMemorymap { get; set; } = true;
+
+        /// <summary>
+        /// Use mlock to keep model in memory (use_mlock)
+        /// </summary>
+        public bool UseMemoryLock { get; set; } = false;
+
+        /// <summary>
+        /// Compute perplexity over the prompt (perplexity)
+        /// </summary>
+        public bool Perplexity { get; set; } = false;
+
+        /// <summary>
+        /// Model path (model)
+        /// </summary>
+        public string ModelPath { get; set; }
+
+        /// <summary>
+        /// model alias
+        /// </summary>
+        public string ModelAlias { get; set; } = "unknown";
 
-		/// <summary>
-		/// Model context size (n_ctx)
-		/// </summary>
-		public int ContextSize { get; set; } = 512;
-		/// <summary>
-		/// the GPU that is used for scratch and small tensors
-		/// </summary>
-		public int MainGpu { get; set; } = 0;
-		/// <summary>
-		/// if true, reduce VRAM usage at the cost of performance
-		/// </summary>
-		public bool LowVram { get; set; } = false;
-		/// <summary>
-		/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
-		/// </summary>
-		public int GpuLayerCount { get; set; } = 20;
-		/// <summary>
-		/// Seed for the random number generator (seed)
-		/// </summary>
-		public int Seed { get; set; } = 1686349486;
-		/// <summary>
-		/// Use f16 instead of f32 for memory kv (memory_f16)
-		/// </summary>
-		public bool UseFp16Memory { get; set; } = true;
-		/// <summary>
-		/// Use mmap for faster loads (use_mmap)
-		/// </summary>
-		public bool UseMemorymap { get; set; } = true;
-		/// <summary>
-		/// Use mlock to keep model in memory (use_mlock)
-		/// </summary>
-		public bool UseMemoryLock { get; set; } = false;
-		/// <summary>
-		/// Compute perplexity over the prompt (perplexity)
-		/// </summary>
-		public bool Perplexity { get; set; } = false;
-		/// <summary>
-		/// Model path (model)
-		/// </summary>
-		public string ModelPath { get; set; }
-		/// <summary>
-		/// model alias
-		/// </summary>
-		public string ModelAlias { get; set; } = "unknown";
-		/// <summary>
-		/// lora adapter path (lora_adapter)
-		/// </summary>
-		public string LoraAdapter { get; set; } = string.Empty;
-		/// <summary>
-		/// base model path for the lora adapter (lora_base)
-		/// </summary>
-		public string LoraBase { get; set; } = string.Empty;
-		/// <summary>
-		/// Number of threads (-1 = autodetect) (n_threads)
-		/// </summary>
-		public int Threads { get; set; } = Math.Max(Environment.ProcessorCount / 2, 1);
-		/// <summary>
-		/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
-		/// </summary>
-		public int BatchSize { get; set; } = 512;
-
-		/// <summary>
-		/// Whether to convert eos to newline during the inference.
-		/// </summary>
-		public bool ConvertEosToNewLine { get; set; } = false;
-
-		/// <summary>
-		/// Whether to use embedding mode. (embedding) Note that if this is set to true, 
-		/// The LLamaModel won't produce text response anymore.
-		/// </summary>
-		public bool EmbeddingMode { get; set; } = false;
-
-		/// <summary>
-		/// how split tensors should be distributed across GPUs
-		/// </summary>
-		public float[] TensorSplits { get; set; }
-
-		/// <summary>
-		/// RoPE base frequency
-		/// </summary>
-		public float RopeFrequencyBase { get; set; } = 10000.0f;
-
-		/// <summary>
-		/// RoPE frequency scaling factor
-		/// </summary>
-		public float RopeFrequencyScale { get; set; } = 1.0f;
-
-		/// <summary>
-		/// Use experimental mul_mat_q kernels
-		/// </summary>
-		public bool MulMatQ { get; set; }
+        /// <summary>
+        /// lora adapter path (lora_adapter)
+        /// </summary>
+        public string LoraAdapter { get; set; } = string.Empty;
+
+        /// <summary>
+        /// base model path for the lora adapter (lora_base)
+        /// </summary>
+        public string LoraBase { get; set; } = string.Empty;
+
+        /// <summary>
+        /// Number of threads (-1 = autodetect) (n_threads)
+        /// </summary>
+        public int Threads { get; set; } = Math.Max(Environment.ProcessorCount / 2, 1);
+
+        /// <summary>
+        /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
+        /// </summary>
+        public int BatchSize { get; set; } = 512;
+
+        /// <summary>
+        /// Whether to convert eos to newline during the inference.
+        /// </summary>
+        public bool ConvertEosToNewLine { get; set; } = false;
+
+        /// <summary>
+        /// Whether to use embedding mode. (embedding) Note that if this is set to true, 
+        /// The LLamaModel won't produce text response anymore.
+        /// </summary>
+        public bool EmbeddingMode { get; set; } = false;
+
+        /// <summary>
+        /// how split tensors should be distributed across GPUs
+        /// </summary>
+        public float[] TensorSplits { get; set; }
+
+        /// <summary>
+        /// RoPE base frequency
+        /// </summary>
+        public float RopeFrequencyBase { get; set; } = 10000.0f;
+
+        /// <summary>
+        /// RoPE frequency scaling factor
+        /// </summary>
+        public float RopeFrequencyScale { get; set; } = 1.0f;
+
+        /// <summary>
+        /// Use experimental mul_mat_q kernels
+        /// </summary>
+        public bool MulMatQ { get; set; }
 
         /// <summary>
         /// The encoding to use for models
diff --git a/LLama.Web/LLamaModel.cs b/LLama.Web/LLamaModel.cs
new file mode 100644
index 00000000..e500ba04
--- /dev/null
+++ b/LLama.Web/LLamaModel.cs
@@ -0,0 +1,106 @@
+﻿using LLama.Abstractions;
+using LLama.Web.Common;
+using System.Collections.Concurrent;
+
+namespace LLama.Web
+{
+    /// <summary>
+    /// Wrapper class for LLamaSharp LLamaWeights
+    /// </summary>
+    /// <seealso cref="System.IDisposable" />
+    public class LLamaModel : IDisposable
+    {
+        private readonly ModelOptions _config;
+        private readonly LLamaWeights _weights;
+        private readonly ConcurrentDictionary<string, LLamaContext> _contexts;
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="LLamaModel"/> class.
+        /// </summary>
+        /// <param name="modelParams">The model parameters.</param>
+        public LLamaModel(ModelOptions modelParams)
+        {
+            _config = modelParams;
+            _weights = LLamaWeights.LoadFromFile(modelParams);
+            _contexts = new ConcurrentDictionary<string, LLamaContext>();
+        }
+
+        /// <summary>
+        /// Gets the model configuration.
+        /// </summary>
+        public IModelParams ModelParams => _config;
+
+        /// <summary>
+        /// Gets the LLamaWeights
+        /// </summary>
+        public LLamaWeights LLamaWeights => _weights;
+
+
+        /// <summary>
+        /// Gets the context count.
+        /// </summary>
+        public int ContextCount => _contexts.Count;
+
+
+        /// <summary>
+        /// Creates a new context session on this model
+        /// </summary>
+        /// <param name="contextName">The unique context identifier</param>
+        /// <returns>LLamaModelContext for this LLamaModel</returns>
+        /// <exception cref="Exception">Context exists</exception>
+        public Task<LLamaContext> CreateContext(string contextName)
+        {
+            if (_contexts.TryGetValue(contextName, out var context))
+                throw new Exception($"Context with id {contextName} already exists.");
+
+            if (_config.MaxInstances > -1 && ContextCount >= _config.MaxInstances)
+                throw new Exception($"Maximum model instances reached");
+
+            context = _weights.CreateContext(_config);
+            if (_contexts.TryAdd(contextName, context))
+                return Task.FromResult(context);
+
+            return Task.FromResult<LLamaContext>(null);
+        }
+
+        /// <summary>
+        /// Get a contexts belonging to this model
+        /// </summary>
+        /// <param name="contextName">The unique context identifier</param>
+        /// <returns>LLamaModelContext for this LLamaModel with the specified contextName</returns>
+        public Task<LLamaContext> GetContext(string contextName)
+        {
+            if (_contexts.TryGetValue(contextName, out var context))
+                return Task.FromResult(context);
+
+            return Task.FromResult<LLamaContext>(null);
+        }
+
+        /// <summary>
+        /// Remove a context from this model
+        /// </summary>
+        /// <param name="contextName">The unique context identifier</param>
+        /// <returns>true if removed, otherwise false</returns>
+        public Task<bool> RemoveContext(string contextName)
+        {
+            if (!_contexts.TryRemove(contextName, out var context))
+                return Task.FromResult(false);
+
+            context?.Dispose();
+            return Task.FromResult(true);
+        }
+
+
+        /// <summary>
+        /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
+        /// </summary>
+        public void Dispose()
+        {
+            foreach (var context in _contexts.Values)
+            {
+                context?.Dispose();
+            }
+            _weights.Dispose();
+        }
+    }
+}
diff --git a/LLama.Web/Services/IModelService.cs b/LLama.Web/Services/IModelService.cs
new file mode 100644
index 00000000..0a98f8f4
--- /dev/null
+++ b/LLama.Web/Services/IModelService.cs
@@ -0,0 +1,76 @@
+﻿using LLama.Web.Common;
+
+namespace LLama.Web.Services
+{
+    /// <summary>
+    /// Service for managing language Models
+    /// </summary>
+    public interface IModelService
+    {
+        /// <summary>
+        /// Gets the model with the specified name.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        Task<LLamaModel> GetModel(string modelName);
+
+
+        /// <summary>
+        /// Loads a model from a ModelConfig object.
+        /// </summary>
+        /// <param name="modelOptions">The model configuration.</param>
+        Task<LLamaModel> LoadModel(ModelOptions modelOptions);
+
+
+        /// <summary>
+        /// Loads all models found in appsettings.json
+        /// </summary>
+        Task LoadModels();
+
+
+        /// <summary>
+        /// Unloads the model with the specified name.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        Task UnloadModel(string modelName);
+
+
+        /// <summary>
+        /// Unloads all models.
+        /// </summary>
+        Task UnloadModels();
+
+
+        /// <summary>
+        /// Gets a context with the specified identifier
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The context identifier.</param>
+        Task<LLamaContext> GetContext(string modelName, string contextName);
+
+
+        /// <summary>
+        /// Removes the context.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The context identifier.</param>
+        Task<bool> RemoveContext(string modelName, string contextName);
+
+
+        /// <summary>
+        /// Creates a context.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The context identifier.</param>
+        Task<LLamaContext> CreateContext(string modelName, string contextName);
+
+
+        /// <summary>
+        /// Gets the or create model and context.
+        /// This will load a model from disk if not already loaded, and also create the context
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The context identifier.</param>
+        /// <returns>Both loaded Model and Context</returns>
+        Task<(LLamaModel, LLamaContext)> GetOrCreateModelAndContext(string modelName, string contextName);
+    }
+}
\ No newline at end of file
diff --git a/LLama.Web/Services/ModelService.cs b/LLama.Web/Services/ModelService.cs
new file mode 100644
index 00000000..16365a5d
--- /dev/null
+++ b/LLama.Web/Services/ModelService.cs
@@ -0,0 +1,202 @@
+﻿using LLama.Web.Async;
+using LLama.Web.Common;
+using System.Collections.Concurrent;
+
+namespace LLama.Web.Services
+{
+
+    /// <summary>
+    /// Sercive for handling Models,Weights & Contexts
+    /// </summary>
+    public class ModelService : IModelService
+    {
+        private readonly AsyncLock _modelLock;
+        private readonly AsyncLock _contextLock;
+        private readonly LLamaOptions _configuration;
+        private readonly ConcurrentDictionary<string, LLamaModel> _modelInstances;
+
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="ModelService"/> class.
+        /// </summary>
+        /// <param name="logger">The logger.</param>
+        /// <param name="options">The options.</param>
+        public ModelService(LLamaOptions configuration)
+        {
+            _modelLock = new AsyncLock();
+            _contextLock = new AsyncLock();
+            _configuration = configuration;
+            _modelInstances = new ConcurrentDictionary<string, LLamaModel>();
+        }
+
+
+        /// <summary>
+        /// Loads a model with the provided configuration.
+        /// </summary>
+        /// <param name="modelOptions">The model configuration.</param>
+        /// <returns></returns>
+        public async Task<LLamaModel> LoadModel(ModelOptions modelOptions)
+        {
+            if (_modelInstances.TryGetValue(modelOptions.Name, out var existingModel))
+                return existingModel;
+
+            using (await _modelLock.LockAsync())
+            {
+                if (_modelInstances.TryGetValue(modelOptions.Name, out var model))
+                    return model;
+
+                // If in single mode unload any other models
+                if (_configuration.ModelLoadType == ModelLoadType.Single
+                 || _configuration.ModelLoadType == ModelLoadType.PreloadSingle)
+                    await UnloadModels();
+
+
+                model = new LLamaModel(modelOptions);
+                _modelInstances.TryAdd(modelOptions.Name, model);
+                return model;
+            }
+        }
+
+
+        /// <summary>
+        /// Loads the models.
+        /// </summary>
+        public async Task LoadModels()
+        {
+            if (_configuration.ModelLoadType == ModelLoadType.Single
+             || _configuration.ModelLoadType == ModelLoadType.Multiple)
+                return;
+
+            foreach (var modelConfig in _configuration.Models)
+            {
+                await LoadModel(modelConfig);
+
+                //Only preload first model if in SinglePreload mode
+                if (_configuration.ModelLoadType == ModelLoadType.PreloadSingle)
+                    break;
+            }
+        }
+
+
+        /// <summary>
+        /// Unloads the model.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <returns></returns>
+        public Task UnloadModel(string modelName)
+        {
+            if (_modelInstances.TryRemove(modelName, out var model))
+            {
+                model?.Dispose();
+                return Task.FromResult(true);
+            }
+            return Task.FromResult(false);
+        }
+
+
+
+        /// <summary>
+        /// Unloads all models.
+        /// </summary>
+        public async Task UnloadModels()
+        {
+            foreach (var modelName in _modelInstances.Keys)
+            {
+                await UnloadModel(modelName);
+            }
+        }
+
+
+        /// <summary>
+        /// Gets a model ny name.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <returns></returns>
+        public Task<LLamaModel> GetModel(string modelName)
+        {
+            _modelInstances.TryGetValue(modelName, out var model);
+            return Task.FromResult(model);
+        }
+
+
+        /// <summary>
+        /// Gets a context from the specified model.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The contextName.</param>
+        /// <returns></returns>
+        /// <exception cref="System.Exception">Model not found</exception>
+        public async Task<LLamaContext> GetContext(string modelName, string contextName)
+        {
+            if (!_modelInstances.TryGetValue(modelName, out var model))
+                throw new Exception("Model not found");
+
+            return await model.GetContext(contextName);
+        }
+
+
+        /// <summary>
+        /// Creates a context on the specified model.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The contextName.</param>
+        /// <returns></returns>
+        /// <exception cref="System.Exception">Model not found</exception>
+        public async Task<LLamaContext> CreateContext(string modelName, string contextName)
+        {
+            if (!_modelInstances.TryGetValue(modelName, out var model))
+                throw new Exception("Model not found");
+
+            using (await _contextLock.LockAsync())
+            {
+                return await model.CreateContext(contextName);
+            }
+        }
+
+
+        /// <summary>
+        /// Removes a context from the specified model.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The contextName.</param>
+        /// <returns></returns>
+        /// <exception cref="System.Exception">Model not found</exception>
+        public async Task<bool> RemoveContext(string modelName, string contextName)
+        {
+            if (!_modelInstances.TryGetValue(modelName, out var model))
+                throw new Exception("Model not found");
+
+            using (await _contextLock.LockAsync())
+            {
+                return await model.RemoveContext(contextName);
+            }
+        }
+
+
+        /// <summary>
+        /// Loads, Gets,Creates a Model and a Context
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The contextName.</param>
+        /// <returns></returns>
+        /// <exception cref="System.Exception">Model option '{modelName}' not found</exception>
+        public async Task<(LLamaModel, LLamaContext)> GetOrCreateModelAndContext(string modelName, string contextName)
+        {
+            if (_modelInstances.TryGetValue(modelName, out var model))
+                return (model, await model.GetContext(contextName) ?? await model.CreateContext(contextName));
+
+
+            // Get model configuration
+            var modelConfig = _configuration.Models.FirstOrDefault(x => x.Name == modelName);
+            if (modelConfig is null)
+                throw new Exception($"Model option '{modelName}' not found");
+
+            // Load Model
+            model = await LoadModel(modelConfig);
+
+            // Get or Create Context
+            return (model, await model.GetContext(contextName) ?? await model.CreateContext(contextName));
+        }
+
+    }
+}

From 44f1b91c292eba68df285005e2e763484a45fc85 Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Wed, 4 Oct 2023 12:57:15 +1300
Subject: [PATCH 2/7] Update Web to support version 0.5.1

---
 LLama.Web/Async/AsyncGuard.cs                 | 107 +++++++++
 LLama.Web/Common/InferenceOptions.cs          | 101 ++++++++
 LLama.Web/Common/LLamaOptions.cs              |   9 -
 LLama.Web/Common/ParameterOptions.cs          | 105 ---------
 LLama.Web/Common/PromptOptions.cs             |  11 -
 LLama.Web/Common/SessionOptions.cs            |  14 ++
 LLama.Web/Extensioms.cs                       |  54 +++++
 LLama.Web/Hubs/ISessionClient.cs              |   1 -
 LLama.Web/Hubs/SessionConnectionHub.cs        |  57 ++---
 LLama.Web/LLama.Web.csproj                    |   4 +
 LLama.Web/{ => Models}/LLamaModel.cs          |   4 +-
 LLama.Web/Models/ModelSession.cs              | 138 ++++++++---
 LLama.Web/Models/ResponseFragment.cs          |  18 --
 LLama.Web/Models/TokenModel.cs                |  24 ++
 LLama.Web/Pages/Executor/Instruct.cshtml      |  96 --------
 LLama.Web/Pages/Executor/Instruct.cshtml.cs   |  34 ---
 LLama.Web/Pages/Executor/Instruct.cshtml.css  |   4 -
 LLama.Web/Pages/Executor/Interactive.cshtml   |  96 --------
 .../Pages/Executor/Interactive.cshtml.cs      |  34 ---
 .../Pages/Executor/Interactive.cshtml.css     |   4 -
 LLama.Web/Pages/Executor/Stateless.cshtml     |  97 --------
 LLama.Web/Pages/Executor/Stateless.cshtml.cs  |  34 ---
 LLama.Web/Pages/Executor/Stateless.cshtml.css |   4 -
 LLama.Web/Pages/Index.cshtml                  | 119 +++++++++-
 LLama.Web/Pages/Index.cshtml.cs               |  25 +-
 LLama.Web/Pages/Shared/_ChatTemplates.cshtml  |  24 +-
 LLama.Web/Pages/Shared/_Layout.cshtml         |  32 +--
 LLama.Web/Pages/Shared/_Parameters.cshtml     | 137 +++++++++++
 LLama.Web/Program.cs                          |   5 +-
 .../Services/ConnectionSessionService.cs      |  94 --------
 LLama.Web/Services/IModelService.cs           |   1 +
 LLama.Web/Services/IModelSessionService.cs    |  84 ++++++-
 LLama.Web/Services/ModelLoaderService.cs      |  42 ++++
 LLama.Web/Services/ModelService.cs            |   1 +
 LLama.Web/Services/ModelSessionService.cs     | 216 ++++++++++++++++++
 LLama.Web/appsettings.json                    |  60 ++---
 LLama.Web/wwwroot/css/site.css                |  25 +-
 LLama.Web/wwwroot/js/sessionConnectionChat.js | 139 +++++++----
 LLama.Web/wwwroot/js/site.js                  |   8 +-
 39 files changed, 1208 insertions(+), 854 deletions(-)
 create mode 100644 LLama.Web/Async/AsyncGuard.cs
 create mode 100644 LLama.Web/Common/InferenceOptions.cs
 delete mode 100644 LLama.Web/Common/ParameterOptions.cs
 delete mode 100644 LLama.Web/Common/PromptOptions.cs
 create mode 100644 LLama.Web/Common/SessionOptions.cs
 create mode 100644 LLama.Web/Extensioms.cs
 rename LLama.Web/{ => Models}/LLamaModel.cs (98%)
 delete mode 100644 LLama.Web/Models/ResponseFragment.cs
 create mode 100644 LLama.Web/Models/TokenModel.cs
 delete mode 100644 LLama.Web/Pages/Executor/Instruct.cshtml
 delete mode 100644 LLama.Web/Pages/Executor/Instruct.cshtml.cs
 delete mode 100644 LLama.Web/Pages/Executor/Instruct.cshtml.css
 delete mode 100644 LLama.Web/Pages/Executor/Interactive.cshtml
 delete mode 100644 LLama.Web/Pages/Executor/Interactive.cshtml.cs
 delete mode 100644 LLama.Web/Pages/Executor/Interactive.cshtml.css
 delete mode 100644 LLama.Web/Pages/Executor/Stateless.cshtml
 delete mode 100644 LLama.Web/Pages/Executor/Stateless.cshtml.cs
 delete mode 100644 LLama.Web/Pages/Executor/Stateless.cshtml.css
 create mode 100644 LLama.Web/Pages/Shared/_Parameters.cshtml
 delete mode 100644 LLama.Web/Services/ConnectionSessionService.cs
 create mode 100644 LLama.Web/Services/ModelLoaderService.cs
 create mode 100644 LLama.Web/Services/ModelSessionService.cs

diff --git a/LLama.Web/Async/AsyncGuard.cs b/LLama.Web/Async/AsyncGuard.cs
new file mode 100644
index 00000000..ff6b6c43
--- /dev/null
+++ b/LLama.Web/Async/AsyncGuard.cs
@@ -0,0 +1,107 @@
+﻿using System.Collections.Concurrent;
+
+namespace LLama.Web.Async
+{
+
+    /// <summary>
+    /// Creates a async/thread-safe guard helper
+    /// </summary>
+    /// <seealso cref="AsyncGuard&lt;byte&gt;" />
+    public class AsyncGuard : AsyncGuard<byte>
+    {
+        private readonly byte _key;
+        private readonly ConcurrentDictionary<byte, bool> _lockData;
+
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="AsyncGuard"/> class.
+        /// </summary>
+        public AsyncGuard()
+        {
+            _key = 0;
+            _lockData = new ConcurrentDictionary<byte, bool>();
+        }
+
+
+        /// <summary>
+        /// Guards this instance.
+        /// </summary>
+        /// <returns>true if able to enter an guard, false if already guarded</returns>
+        public bool Guard()
+        {
+            return _lockData.TryAdd(_key, true);
+        }
+
+
+        /// <summary>
+        /// Releases the guard.
+        /// </summary>
+        /// <returns></returns>
+        public bool Release()
+        {
+            return _lockData.TryRemove(_key, out _);
+        }
+
+
+        /// <summary>
+        /// Determines whether this instance is guarded.
+        /// </summary>
+        /// <returns>
+        ///   <c>true</c> if this instance is guarded; otherwise, <c>false</c>.
+        /// </returns>
+        public bool IsGuarded()
+        {
+            return _lockData.ContainsKey(_key);
+        }
+    }
+
+
+    public class AsyncGuard<T>
+    {
+        private readonly ConcurrentDictionary<T, bool> _lockData;
+
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="AsyncGuard{T}"/> class.
+        /// </summary>
+        public AsyncGuard()
+        {
+            _lockData = new ConcurrentDictionary<T, bool>();
+        }
+
+
+        /// <summary>
+        /// Guards the specified value.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        /// <returns>true if able to enter a guard for this value, false if this value is already guarded</returns>
+        public bool Guard(T value)
+        {
+            return _lockData.TryAdd(value, true);
+        }
+
+
+        /// <summary>
+        /// Releases the guard on the specified value.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        /// <returns></returns>
+        public bool Release(T value)
+        {
+            return _lockData.TryRemove(value, out _);
+        }
+
+
+        /// <summary>
+        /// Determines whether the specified value is guarded.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        /// <returns>
+        ///   <c>true</c> if the specified value is guarded; otherwise, <c>false</c>.
+        /// </returns>
+        public bool IsGuarded(T value)
+        {
+            return _lockData.ContainsKey(value);
+        }
+    }
+}
diff --git a/LLama.Web/Common/InferenceOptions.cs b/LLama.Web/Common/InferenceOptions.cs
new file mode 100644
index 00000000..c2420af3
--- /dev/null
+++ b/LLama.Web/Common/InferenceOptions.cs
@@ -0,0 +1,101 @@
+﻿using LLama.Common;
+using LLama.Abstractions;
+using LLama.Native;
+
+namespace LLama.Web.Common
+{
+    public class InferenceOptions : IInferenceParams
+    {
+        /// <summary>
+        /// number of tokens to keep from initial prompt
+        /// </summary>
+        public int TokensKeep { get; set; } = 0;
+        /// <summary>
+        /// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
+        /// until it complete.
+        /// </summary>
+        public int MaxTokens { get; set; } = -1;
+        /// <summary>
+        /// logit bias for specific tokens
+        /// </summary>
+        public Dictionary<int, float>? LogitBias { get; set; } = null;
+
+        /// <summary>
+        /// Sequences where the model will stop generating further tokens.
+        /// </summary>
+        public IEnumerable<string> AntiPrompts { get; set; } = Array.Empty<string>();
+        /// <summary>
+        /// path to file for saving/loading model eval state
+        /// </summary>
+        public string PathSession { get; set; } = string.Empty;
+        /// <summary>
+        /// string to suffix user inputs with
+        /// </summary>
+        public string InputSuffix { get; set; } = string.Empty;
+        /// <summary>
+        /// string to prefix user inputs with
+        /// </summary>
+        public string InputPrefix { get; set; } = string.Empty;
+        /// <summary>
+        ///  0 or lower to use vocab size
+        /// </summary>
+        public int TopK { get; set; } = 40;
+        /// <summary>
+        /// 1.0 = disabled
+        /// </summary>
+        public float TopP { get; set; } = 0.95f;
+        /// <summary>
+        /// 1.0 = disabled
+        /// </summary>
+        public float TfsZ { get; set; } = 1.0f;
+        /// <summary>
+        /// 1.0 = disabled
+        /// </summary>
+        public float TypicalP { get; set; } = 1.0f;
+        /// <summary>
+        /// 1.0 = disabled
+        /// </summary>
+        public float Temperature { get; set; } = 0.8f;
+        /// <summary>
+        /// 1.0 = disabled
+        /// </summary>
+        public float RepeatPenalty { get; set; } = 1.1f;
+        /// <summary>
+        /// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n)
+        /// </summary>
+        public int RepeatLastTokensCount { get; set; } = 64;
+        /// <summary>
+        /// frequency penalty coefficient
+        /// 0.0 = disabled
+        /// </summary>
+        public float FrequencyPenalty { get; set; } = .0f;
+        /// <summary>
+        /// presence penalty coefficient
+        /// 0.0 = disabled
+        /// </summary>
+        public float PresencePenalty { get; set; } = .0f;
+        /// <summary>
+        /// Mirostat uses tokens instead of words.
+        /// algorithm described in the paper https://arxiv.org/abs/2007.14966.
+        /// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+        /// </summary>
+        public MirostatType Mirostat { get; set; } = MirostatType.Disable;
+        /// <summary>
+        /// target entropy
+        /// </summary>
+        public float MirostatTau { get; set; } = 5.0f;
+        /// <summary>
+        /// learning rate
+        /// </summary>
+        public float MirostatEta { get; set; } = 0.1f;
+        /// <summary>
+        /// consider newlines as a repeatable token (penalize_nl)
+        /// </summary>
+        public bool PenalizeNL { get; set; } = true;
+
+        /// <summary>
+        /// A grammar to constrain possible tokens
+        /// </summary>
+        public SafeLLamaGrammarHandle Grammar { get; set; } = null;
+    }
+}
diff --git a/LLama.Web/Common/LLamaOptions.cs b/LLama.Web/Common/LLamaOptions.cs
index a64b9635..4a1d6e0a 100644
--- a/LLama.Web/Common/LLamaOptions.cs
+++ b/LLama.Web/Common/LLamaOptions.cs
@@ -4,18 +4,9 @@
     {
         public ModelLoadType ModelLoadType { get; set; }
         public List<ModelOptions> Models { get; set; }
-        public List<PromptOptions> Prompts { get; set; } = new List<PromptOptions>();
-        public List<ParameterOptions> Parameters { get; set; } = new List<ParameterOptions>();
 
         public void Initialize()
         {
-            foreach (var prompt in Prompts)
-            {
-                if (File.Exists(prompt.Path))
-                {
-                    prompt.Prompt = File.ReadAllText(prompt.Path).Trim();
-                }
-            }
         }
     }
 }
diff --git a/LLama.Web/Common/ParameterOptions.cs b/LLama.Web/Common/ParameterOptions.cs
deleted file mode 100644
index f78aa861..00000000
--- a/LLama.Web/Common/ParameterOptions.cs
+++ /dev/null
@@ -1,105 +0,0 @@
-﻿using LLama.Common;
-using LLama.Abstractions;
-using LLama.Native;
-
-namespace LLama.Web.Common
-{
-    public class ParameterOptions : IInferenceParams
-	{
-        public string Name { get; set; }
-
-
-
-		/// <summary>
-		/// number of tokens to keep from initial prompt
-		/// </summary>
-		public int TokensKeep { get; set; } = 0;
-		/// <summary>
-		/// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
-		/// until it complete.
-		/// </summary>
-		public int MaxTokens { get; set; } = -1;
-		/// <summary>
-		/// logit bias for specific tokens
-		/// </summary>
-		public Dictionary<int, float>? LogitBias { get; set; } = null;
-
-		/// <summary>
-		/// Sequences where the model will stop generating further tokens.
-		/// </summary>
-		public IEnumerable<string> AntiPrompts { get; set; } = Array.Empty<string>();
-		/// <summary>
-		/// path to file for saving/loading model eval state
-		/// </summary>
-		public string PathSession { get; set; } = string.Empty;
-		/// <summary>
-		/// string to suffix user inputs with
-		/// </summary>
-		public string InputSuffix { get; set; } = string.Empty;
-		/// <summary>
-		/// string to prefix user inputs with
-		/// </summary>
-		public string InputPrefix { get; set; } = string.Empty;
-		/// <summary>
-		///  0 or lower to use vocab size
-		/// </summary>
-		public int TopK { get; set; } = 40;
-		/// <summary>
-		/// 1.0 = disabled
-		/// </summary>
-		public float TopP { get; set; } = 0.95f;
-		/// <summary>
-		/// 1.0 = disabled
-		/// </summary>
-		public float TfsZ { get; set; } = 1.0f;
-		/// <summary>
-		/// 1.0 = disabled
-		/// </summary>
-		public float TypicalP { get; set; } = 1.0f;
-		/// <summary>
-		/// 1.0 = disabled
-		/// </summary>
-		public float Temperature { get; set; } = 0.8f;
-		/// <summary>
-		/// 1.0 = disabled
-		/// </summary>
-		public float RepeatPenalty { get; set; } = 1.1f;
-		/// <summary>
-		/// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n)
-		/// </summary>
-		public int RepeatLastTokensCount { get; set; } = 64;
-		/// <summary>
-		/// frequency penalty coefficient
-		/// 0.0 = disabled
-		/// </summary>
-		public float FrequencyPenalty { get; set; } = .0f;
-		/// <summary>
-		/// presence penalty coefficient
-		/// 0.0 = disabled
-		/// </summary>
-		public float PresencePenalty { get; set; } = .0f;
-		/// <summary>
-		/// Mirostat uses tokens instead of words.
-		/// algorithm described in the paper https://arxiv.org/abs/2007.14966.
-		/// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
-		/// </summary>
-		public MirostatType Mirostat { get; set; } = MirostatType.Disable;
-		/// <summary>
-		/// target entropy
-		/// </summary>
-		public float MirostatTau { get; set; } = 5.0f;
-		/// <summary>
-		/// learning rate
-		/// </summary>
-		public float MirostatEta { get; set; } = 0.1f;
-		/// <summary>
-		/// consider newlines as a repeatable token (penalize_nl)
-		/// </summary>
-		public bool PenalizeNL { get; set; } = true;
-
-		/// <summary>
-		/// A grammar to constrain possible tokens
-		/// </summary>
-        public SafeLLamaGrammarHandle Grammar { get; set; } = null;
-    }
-}
diff --git a/LLama.Web/Common/PromptOptions.cs b/LLama.Web/Common/PromptOptions.cs
deleted file mode 100644
index 4e44a5d1..00000000
--- a/LLama.Web/Common/PromptOptions.cs
+++ /dev/null
@@ -1,11 +0,0 @@
-﻿namespace LLama.Web.Common
-{
-    public class PromptOptions
-    {
-        public string Name { get; set; }
-        public string Path { get; set; }
-        public string Prompt { get; set; }
-        public List<string> AntiPrompt { get; set; }
-        public List<string> OutputFilter { get; set; }
-    }
-}
diff --git a/LLama.Web/Common/SessionOptions.cs b/LLama.Web/Common/SessionOptions.cs
new file mode 100644
index 00000000..34386955
--- /dev/null
+++ b/LLama.Web/Common/SessionOptions.cs
@@ -0,0 +1,14 @@
+﻿namespace LLama.Web.Common
+{
+    public class SessionOptions
+    {
+        public string Model { get; set; }
+        public string Prompt { get; set; }
+
+        public string AntiPrompt { get; set; }
+        public List<string> AntiPrompts { get; set; }
+        public string OutputFilter { get; set; }
+        public List<string> OutputFilters { get; set; }
+        public LLamaExecutorType ExecutorType { get; set; }
+    }
+}
diff --git a/LLama.Web/Extensioms.cs b/LLama.Web/Extensioms.cs
new file mode 100644
index 00000000..50bb55c4
--- /dev/null
+++ b/LLama.Web/Extensioms.cs
@@ -0,0 +1,54 @@
+﻿using LLama.Web.Common;
+
+namespace LLama.Web
+{
+    public static  class Extensioms
+    {
+        /// <summary>
+        /// Combines the AntiPrompts list and AntiPrompt csv 
+        /// </summary>
+        /// <param name="sessionConfig">The session configuration.</param>
+        /// <returns>Combined AntiPrompts with duplicates removed</returns>
+        public static List<string> GetAntiPrompts(this Common.SessionOptions sessionConfig)
+        {
+            return CombineCSV(sessionConfig.AntiPrompts, sessionConfig.AntiPrompt);
+        }
+
+        /// <summary>
+        /// Combines the OutputFilters list and OutputFilter csv 
+        /// </summary>
+        /// <param name="sessionConfig">The session configuration.</param>
+        /// <returns>Combined OutputFilters with duplicates removed</returns>
+        public static List<string> GetOutputFilters(this Common.SessionOptions sessionConfig)
+        {
+            return CombineCSV(sessionConfig.OutputFilters, sessionConfig.OutputFilter);
+        }
+
+
+        /// <summary>
+        /// Combines a string list and a csv and removes duplicates
+        /// </summary>
+        /// <param name="list">The list.</param>
+        /// <param name="csv">The CSV.</param>
+        /// <returns>Combined list with duplicates removed</returns>
+        private static List<string> CombineCSV(List<string> list, string csv)
+        {
+            var results = list?.Count == 0
+                ? CommaSeperatedToList(csv)
+                : CommaSeperatedToList(csv).Concat(list);
+            return results
+                .Distinct()
+                .ToList();
+        }
+
+        private static List<string> CommaSeperatedToList(string value)
+        {
+            if (string.IsNullOrEmpty(value))
+                return new List<string>();
+
+            return value.Split(",", StringSplitOptions.RemoveEmptyEntries)
+                 .Select(x => x.Trim())
+                 .ToList();
+        }
+    }
+}
diff --git a/LLama.Web/Hubs/ISessionClient.cs b/LLama.Web/Hubs/ISessionClient.cs
index 9e9dc0f1..92302b21 100644
--- a/LLama.Web/Hubs/ISessionClient.cs
+++ b/LLama.Web/Hubs/ISessionClient.cs
@@ -6,7 +6,6 @@ namespace LLama.Web.Hubs
     public interface ISessionClient
     {
         Task OnStatus(string connectionId, SessionConnectionStatus status);
-        Task OnResponse(ResponseFragment fragment);
         Task OnError(string error);
     }
 }
diff --git a/LLama.Web/Hubs/SessionConnectionHub.cs b/LLama.Web/Hubs/SessionConnectionHub.cs
index 080866c6..730d4e87 100644
--- a/LLama.Web/Hubs/SessionConnectionHub.cs
+++ b/LLama.Web/Hubs/SessionConnectionHub.cs
@@ -2,16 +2,15 @@
 using LLama.Web.Models;
 using LLama.Web.Services;
 using Microsoft.AspNetCore.SignalR;
-using System.Diagnostics;
 
 namespace LLama.Web.Hubs
 {
     public class SessionConnectionHub : Hub<ISessionClient>
     {
         private readonly ILogger<SessionConnectionHub> _logger;
-        private readonly ConnectionSessionService _modelSessionService;
+        private readonly IModelSessionService _modelSessionService;
 
-        public SessionConnectionHub(ILogger<SessionConnectionHub> logger, ConnectionSessionService modelSessionService)
+        public SessionConnectionHub(ILogger<SessionConnectionHub> logger, IModelSessionService modelSessionService)
         {
             _logger = logger;
             _modelSessionService = modelSessionService;
@@ -27,29 +26,27 @@ namespace LLama.Web.Hubs
         }
 
 
-        public override async Task OnDisconnectedAsync(Exception? exception)
+        public override async Task OnDisconnectedAsync(Exception exception)
         {
             _logger.Log(LogLevel.Information, "[OnDisconnectedAsync], Id: {0}", Context.ConnectionId);
 
             // Remove connections session on dissconnect
-            await _modelSessionService.RemoveAsync(Context.ConnectionId);
+            await _modelSessionService.CloseAsync(Context.ConnectionId);
             await base.OnDisconnectedAsync(exception);
         }
 
 
         [HubMethodName("LoadModel")]
-        public async Task OnLoadModel(LLamaExecutorType executorType, string modelName, string promptName, string parameterName)
+        public async Task OnLoadModel(Common.SessionOptions sessionConfig, InferenceOptions inferenceConfig)
         {
-            _logger.Log(LogLevel.Information, "[OnLoadModel] - Load new model, Connection: {0}, Model: {1}, Prompt: {2}, Parameter: {3}", Context.ConnectionId, modelName, promptName, parameterName);
-
-            // Remove existing connections session
-            await _modelSessionService.RemoveAsync(Context.ConnectionId);
+            _logger.Log(LogLevel.Information, "[OnLoadModel] - Load new model, Connection: {0}", Context.ConnectionId);
+            await _modelSessionService.CloseAsync(Context.ConnectionId);
 
             // Create model session
-            var modelSessionResult = await _modelSessionService.CreateAsync(executorType, Context.ConnectionId, modelName, promptName, parameterName);
-            if (modelSessionResult.HasError)
+            var modelSession = await _modelSessionService.CreateAsync(Context.ConnectionId, sessionConfig, inferenceConfig);
+            if (modelSession is null)
             {
-                await Clients.Caller.OnError(modelSessionResult.Error);
+                await Clients.Caller.OnError("Failed to create model session");
                 return;
             }
 
@@ -59,40 +56,12 @@ namespace LLama.Web.Hubs
 
 
         [HubMethodName("SendPrompt")]
-        public async Task OnSendPrompt(string prompt)
+        public IAsyncEnumerable<TokenModel> OnSendPrompt(string prompt, InferenceOptions inferConfig, CancellationToken cancellationToken)
         {
             _logger.Log(LogLevel.Information, "[OnSendPrompt] - New prompt received, Connection: {0}", Context.ConnectionId);
 
-            // Get connections session
-            var modelSession = await _modelSessionService.GetAsync(Context.ConnectionId);
-            if (modelSession is null)
-            {
-                await Clients.Caller.OnError("No model has been loaded");
-                return;
-            }
-
-
-            // Create unique response id
-            var responseId = Guid.NewGuid().ToString();
-
-            // Send begin of response
-            await Clients.Caller.OnResponse(new ResponseFragment(responseId, isFirst: true));
-
-            // Send content of response
-            var stopwatch = Stopwatch.GetTimestamp();
-            await foreach (var fragment in modelSession.InferAsync(prompt, CancellationTokenSource.CreateLinkedTokenSource(Context.ConnectionAborted)))
-            {
-                await Clients.Caller.OnResponse(new ResponseFragment(responseId, fragment));
-            }
-
-            // Send end of response
-            var elapsedTime = Stopwatch.GetElapsedTime(stopwatch);
-            var signature = modelSession.IsInferCanceled()
-                ? $"Inference cancelled after {elapsedTime.TotalSeconds:F0} seconds"
-                : $"Inference completed in {elapsedTime.TotalSeconds:F0} seconds";
-            await Clients.Caller.OnResponse(new ResponseFragment(responseId, signature, isLast: true));
-            _logger.Log(LogLevel.Information, "[OnSendPrompt] - Inference complete, Connection: {0}, Elapsed: {1}, Canceled: {2}", Context.ConnectionId, elapsedTime, modelSession.IsInferCanceled());
+            var linkedCancelationToken = CancellationTokenSource.CreateLinkedTokenSource(Context.ConnectionAborted, cancellationToken);
+            return _modelSessionService.InferAsync(Context.ConnectionId, prompt, inferConfig, linkedCancelationToken.Token);
         }
-
     }
 }
diff --git a/LLama.Web/LLama.Web.csproj b/LLama.Web/LLama.Web.csproj
index d0e15a62..5a46c5e8 100644
--- a/LLama.Web/LLama.Web.csproj
+++ b/LLama.Web/LLama.Web.csproj
@@ -14,4 +14,8 @@
     <Folder Include="wwwroot\image\" />
   </ItemGroup>
 
+  <ItemGroup>
+    <PackageReference Include="System.Linq.Async" Version="6.0.1" />
+  </ItemGroup>
+
 </Project>
diff --git a/LLama.Web/LLamaModel.cs b/LLama.Web/Models/LLamaModel.cs
similarity index 98%
rename from LLama.Web/LLamaModel.cs
rename to LLama.Web/Models/LLamaModel.cs
index e500ba04..71bb290e 100644
--- a/LLama.Web/LLamaModel.cs
+++ b/LLama.Web/Models/LLamaModel.cs
@@ -2,12 +2,12 @@
 using LLama.Web.Common;
 using System.Collections.Concurrent;
 
-namespace LLama.Web
+namespace LLama.Web.Models
 {
     /// <summary>
     /// Wrapper class for LLamaSharp LLamaWeights
     /// </summary>
-    /// <seealso cref="System.IDisposable" />
+    /// <seealso cref="IDisposable" />
     public class LLamaModel : IDisposable
     {
         private readonly ModelOptions _config;
diff --git a/LLama.Web/Models/ModelSession.cs b/LLama.Web/Models/ModelSession.cs
index c53676f2..35413f92 100644
--- a/LLama.Web/Models/ModelSession.cs
+++ b/LLama.Web/Models/ModelSession.cs
@@ -3,46 +3,97 @@ using LLama.Web.Common;
 
 namespace LLama.Web.Models
 {
-    public class ModelSession : IDisposable
+    public class ModelSession
     {
-        private bool _isFirstInteraction = true;
-        private ModelOptions _modelOptions;
-        private PromptOptions _promptOptions;
-        private ParameterOptions _inferenceOptions;
-        private ITextStreamTransform _outputTransform;
-        private ILLamaExecutor _executor;
+        private readonly string _sessionId;
+        private readonly LLamaModel _model;
+        private readonly LLamaContext _context;
+        private readonly ILLamaExecutor _executor;
+        private readonly Common.SessionOptions _sessionParams;
+        private readonly ITextStreamTransform _outputTransform;
+        private readonly InferenceOptions _defaultInferenceConfig;
+
         private CancellationTokenSource _cancellationTokenSource;
 
-        public ModelSession(ILLamaExecutor executor, ModelOptions modelOptions, PromptOptions promptOptions, ParameterOptions parameterOptions)
+        public ModelSession(LLamaModel model, LLamaContext context, string sessionId, Common.SessionOptions sessionOptions, InferenceOptions inferenceOptions = null)
         {
-            _executor = executor;
-            _modelOptions = modelOptions;
-            _promptOptions = promptOptions;
-            _inferenceOptions = parameterOptions;
-            
-            _inferenceOptions.AntiPrompts = _promptOptions.AntiPrompt?.Concat(_inferenceOptions.AntiPrompts ?? Enumerable.Empty<string>()).Distinct() ?? _inferenceOptions.AntiPrompts;
-            if (_promptOptions.OutputFilter?.Count > 0)
-                _outputTransform = new LLamaTransforms.KeywordTextOutputStreamTransform(_promptOptions.OutputFilter, redundancyLength: 5);
+            _model = model;
+            _context = context;
+            _sessionId = sessionId;
+            _sessionParams = sessionOptions;
+            _defaultInferenceConfig = inferenceOptions ?? new InferenceOptions();
+            _outputTransform = CreateOutputFilter(_sessionParams);
+            _executor = CreateExecutor(_model, _context, _sessionParams);
         }
 
-        public string ModelName
+        /// <summary>
+        /// Gets the session identifier.
+        /// </summary>
+        public string SessionId => _sessionId;
+
+        /// <summary>
+        /// Gets the name of the model.
+        /// </summary>
+        public string ModelName => _sessionParams.Model;
+
+        /// <summary>
+        /// Gets the context.
+        /// </summary>
+        public LLamaContext Context => _context;
+
+        /// <summary>
+        /// Gets the session configuration.
+        /// </summary>
+        public Common.SessionOptions SessionConfig => _sessionParams;
+
+        /// <summary>
+        /// Gets the inference parameters.
+        /// </summary>
+        public InferenceOptions InferenceParams => _defaultInferenceConfig;
+
+
+
+        /// <summary>
+        /// Initializes the prompt.
+        /// </summary>
+        /// <param name="inferenceConfig">The inference configuration.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        internal async Task InitializePrompt(InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
         {
-            get { return _modelOptions.Name; }
+            if (_sessionParams.ExecutorType == LLamaExecutorType.Stateless)
+                return;
+
+            if (string.IsNullOrEmpty(_sessionParams.Prompt))
+                return;
+
+            // Run Initial prompt
+            var inferenceParams = ConfigureInferenceParams(inferenceConfig);
+            _cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
+            await foreach (var _ in _executor.InferAsync(_sessionParams.Prompt, inferenceParams, _cancellationTokenSource.Token))
+            {
+                // We dont really need the response of the initial prompt, so exit on first token
+                break;
+            };
         }
 
-        public IAsyncEnumerable<string> InferAsync(string message, CancellationTokenSource cancellationTokenSource)
+
+        /// <summary>
+        /// Runs inference on the model context
+        /// </summary>
+        /// <param name="message">The message.</param>
+        /// <param name="inferenceConfig">The inference configuration.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <returns></returns>
+        internal IAsyncEnumerable<string> InferAsync(string message, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
         {
-            _cancellationTokenSource = cancellationTokenSource;
-            if (_isFirstInteraction)
-            {
-                _isFirstInteraction = false;
-                message = _promptOptions.Prompt + message;
-            }
+            var inferenceParams = ConfigureInferenceParams(inferenceConfig);
+            _cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
 
+            var inferenceStream = _executor.InferAsync(message, inferenceParams, _cancellationTokenSource.Token);
             if (_outputTransform is not null)
-                return _outputTransform.TransformAsync(_executor.InferAsync(message, _inferenceOptions, _cancellationTokenSource.Token));
+                return _outputTransform.TransformAsync(inferenceStream);
 
-            return _executor.InferAsync(message, _inferenceOptions, _cancellationTokenSource.Token);
+            return inferenceStream;
         }
 
 
@@ -56,13 +107,36 @@ namespace LLama.Web.Models
             return _cancellationTokenSource.IsCancellationRequested;
         }
 
-        public void Dispose()
+        /// <summary>
+        /// Configures the inference parameters.
+        /// </summary>
+        /// <param name="inferenceConfig">The inference configuration.</param>
+        private IInferenceParams ConfigureInferenceParams(InferenceOptions inferenceConfig)
+        {
+            var inferenceParams = inferenceConfig ?? _defaultInferenceConfig;
+            inferenceParams.AntiPrompts = _sessionParams.GetAntiPrompts();
+            return inferenceParams;
+        }
+
+        private ITextStreamTransform CreateOutputFilter(Common.SessionOptions sessionConfig)
         {
-            _inferenceOptions = null;
-            _outputTransform = null;
+            var outputFilters = sessionConfig.GetOutputFilters();
+            if (outputFilters.Count > 0)
+                return new LLamaTransforms.KeywordTextOutputStreamTransform(outputFilters);
 
-            _executor?.Context.Dispose();
-            _executor = null;
+            return null;
+        }
+
+
+        private ILLamaExecutor CreateExecutor(LLamaModel model, LLamaContext context, Common.SessionOptions sessionConfig)
+        {
+            return sessionConfig.ExecutorType switch
+            {
+                LLamaExecutorType.Interactive => new InteractiveExecutor(_context),
+                LLamaExecutorType.Instruct => new InstructExecutor(_context),
+                LLamaExecutorType.Stateless => new StatelessExecutor(_model.LLamaWeights, _model.ModelParams),
+                _ => default
+            };
         }
     }
 }
diff --git a/LLama.Web/Models/ResponseFragment.cs b/LLama.Web/Models/ResponseFragment.cs
deleted file mode 100644
index 02f27f13..00000000
--- a/LLama.Web/Models/ResponseFragment.cs
+++ /dev/null
@@ -1,18 +0,0 @@
-﻿namespace LLama.Web.Models
-{
-    public class ResponseFragment
-    {
-        public ResponseFragment(string id, string content = null, bool isFirst = false, bool isLast = false)
-        {
-            Id = id;
-            IsLast = isLast;
-            IsFirst = isFirst;
-            Content = content;
-        }
-
-        public string Id { get; set; }
-        public string Content { get; set; }
-        public bool IsLast { get; set; }
-        public bool IsFirst { get; set; }
-    }
-}
diff --git a/LLama.Web/Models/TokenModel.cs b/LLama.Web/Models/TokenModel.cs
new file mode 100644
index 00000000..c95f9ec6
--- /dev/null
+++ b/LLama.Web/Models/TokenModel.cs
@@ -0,0 +1,24 @@
+﻿namespace LLama.Web.Models
+{
+    public class TokenModel
+    {
+        public TokenModel(string id, string content = null, TokenType tokenType = TokenType.Content)
+        {
+            Id = id;
+            Content = content;
+            TokenType = tokenType;
+        }
+
+        public string Id { get; set; }
+        public string Content { get; set; }
+        public TokenType TokenType { get; set; }
+    }
+
+    public enum TokenType
+    {
+        Begin = 0,
+        Content = 2,
+        End = 4,
+        Cancel = 10
+    }
+}
diff --git a/LLama.Web/Pages/Executor/Instruct.cshtml b/LLama.Web/Pages/Executor/Instruct.cshtml
deleted file mode 100644
index 9f8cb2d8..00000000
--- a/LLama.Web/Pages/Executor/Instruct.cshtml
+++ /dev/null
@@ -1,96 +0,0 @@
-﻿@page
-@model InstructModel
-@{
-
-}
-@Html.AntiForgeryToken()
-<div class="d-flex flex-row h-100 pt-1 pb-1">
-
-    <div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto">
-        <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center">
-            <h4>Instruct</h4>
-            <div>
-                <span>Hub: <b id="socket">Disconnected</b></span>
-            </div>
-        </div>
-
-        <div class="m-1">
-            <small>Model</small>
-            <select id="Model" class="form-control  form-select input-control" required="required" autocomplete="off">
-                <option value="" disabled selected hidden>Please Select</option>
-                @foreach (var modelOption in Model.Options.Models)
-                {
-                    <option value="@modelOption.Name">@modelOption.Name</option>
-                }
-            </select>
-        </div>
-
-        <div class="m-1">
-            <small>Parameters</small>
-            <select id="Parameter" class="form-control  form-select input-control" required="required" autocomplete="off">
-                <option value="" disabled selected hidden>Please Select</option>
-                @foreach (var parameterOption in Model.Options.Parameters)
-                {
-                    <option value="@parameterOption.Name">@parameterOption.Name</option>
-                }
-            </select>
-        </div>
-
-        <div class="m-1">
-            <small>Prompt</small>
-            <select id="Prompt" class="form-control  form-select input-control" required="required" autocomplete="off">
-                <option value="" disabled selected hidden>Please Select</option>
-                @foreach (var promptOption in Model.Options.Prompts)
-                {
-                    <option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option>
-                }
-            </select>
-            <textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea>
-        </div>
-
-        <div class="d-flex flex-grow-1"></div>
-        <div id="session-details" class="m-1"></div>
-        <div class="m-1">
-            <button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button>
-        </div>
-    </div>
-
-    <div class="d-flex flex-column h-100 w-75">
-        <div class="section-head">
-        </div>
-
-        <div id="scroll-container" class="section-content border">
-            <div id="output-container" class="d-flex flex-column gap-1 p-1">
-            </div>
-        </div>
-
-        <div class="section-foot">
-            <div class="input-group mt-2">
-                <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea>
-                <div class="d-flex flex-column">
-                    <div class="d-flex flex-fill">
-                        <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button>
-                    </div>
-                    <div class="d-flex">
-                        <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off">
-                            <i class="bi-x-circle"></i>
-                        </button>
-                        <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off">
-                            <i class="bi-trash3"></i>
-                        </button>
-                    </div>
-                </div>
-            </div>
-        </div>
-
-    </div>
-</div>
-
-@{ await Html.RenderPartialAsync("_ChatTemplates"); }
-
-@section Scripts {
-    <script src="~/js/sessionconnectionchat.js"></script>
-    <script>
-        createConnectionSessionChat(Enums.LLamaExecutorType.Instruct);
-    </script>
-}
\ No newline at end of file
diff --git a/LLama.Web/Pages/Executor/Instruct.cshtml.cs b/LLama.Web/Pages/Executor/Instruct.cshtml.cs
deleted file mode 100644
index 18a58253..00000000
--- a/LLama.Web/Pages/Executor/Instruct.cshtml.cs
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿using LLama.Web.Common;
-using LLama.Web.Models;
-using LLama.Web.Services;
-using Microsoft.AspNetCore.Mvc;
-using Microsoft.AspNetCore.Mvc.RazorPages;
-using Microsoft.Extensions.Options;
-
-namespace LLama.Web.Pages
-{
-    public class InstructModel : PageModel
-    {
-        private readonly ILogger<InstructModel> _logger;
-        private readonly ConnectionSessionService _modelSessionService;
-
-        public InstructModel(ILogger<InstructModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService)
-        {
-            _logger = logger;
-            Options = options.Value;
-            _modelSessionService = modelSessionService;
-        }
-
-        public LLamaOptions Options { get; set; }
-
-        public void OnGet()
-        {
-        }
-
-        public async Task<IActionResult> OnPostCancel(CancelModel model)
-        {
-            await _modelSessionService.CancelAsync(model.ConnectionId);
-            return new JsonResult(default);
-        }
-    }
-}
\ No newline at end of file
diff --git a/LLama.Web/Pages/Executor/Instruct.cshtml.css b/LLama.Web/Pages/Executor/Instruct.cshtml.css
deleted file mode 100644
index ed9a1d59..00000000
--- a/LLama.Web/Pages/Executor/Instruct.cshtml.css
+++ /dev/null
@@ -1,4 +0,0 @@
-﻿.section-content {
-    flex: 1;
-    overflow-y: scroll;
-}
diff --git a/LLama.Web/Pages/Executor/Interactive.cshtml b/LLama.Web/Pages/Executor/Interactive.cshtml
deleted file mode 100644
index 916b59ca..00000000
--- a/LLama.Web/Pages/Executor/Interactive.cshtml
+++ /dev/null
@@ -1,96 +0,0 @@
-﻿@page
-@model InteractiveModel
-@{
-
-}
-@Html.AntiForgeryToken()
-<div class="d-flex flex-row h-100 pt-1 pb-1">
-
-    <div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto">
-        <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center">
-            <h4>Interactive</h4>
-            <div>
-                <span>Hub: <b id="socket">Disconnected</b></span>
-            </div>
-        </div>
-
-        <div class="m-1">
-            <small>Model</small>
-            <select id="Model" class="form-control  form-select input-control" required="required" autocomplete="off">
-                <option value="" disabled selected hidden>Please Select</option>
-                @foreach (var modelOption in Model.Options.Models)
-                {
-                    <option value="@modelOption.Name">@modelOption.Name</option>
-                }
-            </select>
-        </div>
-
-        <div class="m-1">
-            <small>Parameters</small>
-            <select id="Parameter" class="form-control  form-select input-control" required="required" autocomplete="off">
-                <option value="" disabled selected hidden>Please Select</option>
-                @foreach (var parameterOption in Model.Options.Parameters)
-                {
-                    <option value="@parameterOption.Name">@parameterOption.Name</option>
-                }
-            </select>
-        </div>
-
-        <div class="m-1">
-            <small>Prompt</small>
-            <select id="Prompt" class="form-control  form-select input-control" required="required" autocomplete="off">
-                <option value="" disabled selected hidden>Please Select</option>
-                @foreach (var promptOption in Model.Options.Prompts)
-                {
-                    <option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option>
-                }
-            </select>
-            <textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea>
-        </div>
-
-        <div class="d-flex flex-grow-1"></div>
-        <div id="session-details" class="m-1"></div>
-        <div class="m-1">
-            <button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button>
-        </div>
-    </div>
-
-    <div class="d-flex flex-column h-100 w-75">
-        <div class="section-head">
-        </div>
-
-        <div id="scroll-container" class="section-content border">
-            <div id="output-container" class="d-flex flex-column gap-1 p-1">
-            </div>
-        </div>
-
-        <div class="section-foot">
-            <div class="input-group mt-2">
-                <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea>
-                <div class="d-flex flex-column">
-                    <div class="d-flex flex-fill">
-                        <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button>
-                    </div>
-                    <div class="d-flex">
-                        <button class="btn btn-outline-secondary w-100" type="button" id="cancel"  autocomplete="off">
-                            <i class="bi-x-circle"></i>
-                        </button>
-                        <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off">
-                            <i class="bi-trash3"></i>
-                        </button>
-                    </div>
-                </div>
-            </div>
-        </div>
-
-    </div>
-</div>
-
-@{ await Html.RenderPartialAsync("_ChatTemplates");}
-
-@section Scripts {
-    <script src="~/js/sessionconnectionchat.js"></script>
-    <script>
-        createConnectionSessionChat(Enums.LLamaExecutorType.Interactive);
-    </script>
-}
\ No newline at end of file
diff --git a/LLama.Web/Pages/Executor/Interactive.cshtml.cs b/LLama.Web/Pages/Executor/Interactive.cshtml.cs
deleted file mode 100644
index 7179a440..00000000
--- a/LLama.Web/Pages/Executor/Interactive.cshtml.cs
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿using LLama.Web.Common;
-using LLama.Web.Models;
-using LLama.Web.Services;
-using Microsoft.AspNetCore.Mvc;
-using Microsoft.AspNetCore.Mvc.RazorPages;
-using Microsoft.Extensions.Options;
-
-namespace LLama.Web.Pages
-{
-    public class InteractiveModel : PageModel
-    {
-        private readonly ILogger<InteractiveModel> _logger;
-        private readonly ConnectionSessionService _modelSessionService;
-
-        public InteractiveModel(ILogger<InteractiveModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService)
-        {
-            _logger = logger;
-            Options = options.Value;
-            _modelSessionService = modelSessionService;
-        }
-
-        public LLamaOptions Options { get; set; }
-
-        public void OnGet()
-        {
-        }
-
-        public async Task<IActionResult> OnPostCancel(CancelModel model)
-        {
-            await _modelSessionService.CancelAsync(model.ConnectionId);
-            return new JsonResult(default);
-        }
-    }
-}
\ No newline at end of file
diff --git a/LLama.Web/Pages/Executor/Interactive.cshtml.css b/LLama.Web/Pages/Executor/Interactive.cshtml.css
deleted file mode 100644
index ed9a1d59..00000000
--- a/LLama.Web/Pages/Executor/Interactive.cshtml.css
+++ /dev/null
@@ -1,4 +0,0 @@
-﻿.section-content {
-    flex: 1;
-    overflow-y: scroll;
-}
diff --git a/LLama.Web/Pages/Executor/Stateless.cshtml b/LLama.Web/Pages/Executor/Stateless.cshtml
deleted file mode 100644
index b5d8eea3..00000000
--- a/LLama.Web/Pages/Executor/Stateless.cshtml
+++ /dev/null
@@ -1,97 +0,0 @@
-﻿@page
-@model StatelessModel
-@{
-
-}
-@Html.AntiForgeryToken()
-<div class="d-flex flex-row h-100 pt-1 pb-1">
-
-    <div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto">
-        <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center">
-            <h4>Stateless</h4>
-            <div>
-                <span>Hub: <b id="socket">Disconnected</b></span>
-            </div>
-        </div>
-
-        <div class="m-1">
-            <small>Model</small>
-            <select id="Model" class="form-control  form-select input-control" required="required" autocomplete="off">
-                <option value="" disabled selected hidden>Please Select</option>
-                @foreach (var modelOption in Model.Options.Models)
-                {
-                    <option value="@modelOption.Name">@modelOption.Name</option>
-                }
-            </select>
-        </div>
-
-        <div class="m-1">
-            <small>Parameters</small>
-            <select id="Parameter" class="form-control  form-select input-control" required="required" autocomplete="off">
-                <option value="" disabled selected hidden>Please Select</option>
-                @foreach (var parameterOption in Model.Options.Parameters)
-                {
-                    <option value="@parameterOption.Name">@parameterOption.Name</option>
-                }
-            </select>
-        </div>
-
-        <div class="m-1">
-            <small>Prompt</small>
-            <select id="Prompt" class="form-control  form-select input-control" required="required" autocomplete="off">
-                <option value="" disabled selected hidden>Please Select</option>
-                @foreach (var promptOption in Model.Options.Prompts)
-                {
-                    <option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option>
-                }
-            </select>
-            <textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea>
-        </div>
-
-        <div class="d-flex flex-grow-1"></div>
-        <div id="session-details" class="m-1"></div>
-        <div class="m-1">
-            <button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button>
-        </div>
-    </div>
-
-    <div class="d-flex flex-column h-100 w-75">
-        <div class="section-head">
-        </div>
-
-        <div id="scroll-container" class="section-content border">
-            <div id="output-container" class="d-flex flex-column gap-1 p-1">
-            </div>
-        </div>
-
-        <div class="section-foot">
-            <div class="input-group mt-2">
-                <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea>
-                <div class="d-flex flex-column">
-                    <div class="d-flex flex-fill">
-                        <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button>
-                    </div>
-                    <div class="d-flex">
-                        <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off">
-                            <i class="bi-x-circle"></i>
-                        </button>
-                        <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off">
-                            <i class="bi-trash3"></i>
-                        </button>
-                    </div>
-                </div>
-            </div>
-        </div>
-
-    </div>
-</div>
-
-@{ await Html.RenderPartialAsync("_ChatTemplates"); }
-
-
-@section Scripts {
-    <script src="~/js/sessionconnectionchat.js"></script>
-    <script>
-        createConnectionSessionChat(Enums.LLamaExecutorType.Stateless);
-    </script>
-}
\ No newline at end of file
diff --git a/LLama.Web/Pages/Executor/Stateless.cshtml.cs b/LLama.Web/Pages/Executor/Stateless.cshtml.cs
deleted file mode 100644
index f88c4b83..00000000
--- a/LLama.Web/Pages/Executor/Stateless.cshtml.cs
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿using LLama.Web.Common;
-using LLama.Web.Models;
-using LLama.Web.Services;
-using Microsoft.AspNetCore.Mvc;
-using Microsoft.AspNetCore.Mvc.RazorPages;
-using Microsoft.Extensions.Options;
-
-namespace LLama.Web.Pages
-{
-    public class StatelessModel : PageModel
-    {
-        private readonly ILogger<StatelessModel> _logger;
-        private readonly ConnectionSessionService _modelSessionService;
-
-        public StatelessModel(ILogger<StatelessModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService)
-        {
-            _logger = logger;
-            Options = options.Value;
-            _modelSessionService = modelSessionService;
-        }
-
-        public LLamaOptions Options { get; set; }
-
-        public void OnGet()
-        {
-        }
-
-        public async Task<IActionResult> OnPostCancel(CancelModel model)
-        {
-            await _modelSessionService.CancelAsync(model.ConnectionId);
-            return new JsonResult(default);
-        }
-    }
-}
\ No newline at end of file
diff --git a/LLama.Web/Pages/Executor/Stateless.cshtml.css b/LLama.Web/Pages/Executor/Stateless.cshtml.css
deleted file mode 100644
index ed9a1d59..00000000
--- a/LLama.Web/Pages/Executor/Stateless.cshtml.css
+++ /dev/null
@@ -1,4 +0,0 @@
-﻿.section-content {
-    flex: 1;
-    overflow-y: scroll;
-}
diff --git a/LLama.Web/Pages/Index.cshtml b/LLama.Web/Pages/Index.cshtml
index b5f0c15f..55512603 100644
--- a/LLama.Web/Pages/Index.cshtml
+++ b/LLama.Web/Pages/Index.cshtml
@@ -1,10 +1,121 @@
 ﻿@page
+@using LLama.Web.Common;
+
 @model IndexModel
 @{
-    ViewData["Title"] = "Home page";
+    ViewData["Title"] = "Inference Demo";
 }
 
-<div class="text-center">
-    <h1 class="display-4">Welcome</h1>
-    <p>Learn about <a href="https://docs.microsoft.com/aspnet/core">building Web apps with ASP.NET Core</a>.</p>
+@Html.AntiForgeryToken()
+<div class="d-flex flex-row h-100 pt-1 pb-1">
+
+    <div class="d-flex flex-column h-100 border me-1 w-25">
+        <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center">
+            <div>
+                <span>@ViewData["Title"]</span>
+            </div>
+            <div>
+                <small>Socket: <b id="socket">Disconnected</b></small>
+            </div>
+        </div>
+
+        <div class="d-flex flex-column overflow-auto">
+            <form id="SessionParameters">
+                <div class="d-flex flex-column m-1">
+                    <div class="d-flex flex-column mb-2">
+                        <small>Model</small>
+                        @Html.DropDownListFor(m => m.SessionOptions.Model, new SelectList(Model.Options.Models, "Name", "Name"), new {  @class = "form-control prompt-control" ,required="required", autocomplete="off"})
+                    </div>
+                    <div class="d-flex flex-column mb-2">
+                        <small>Inference Type</small>
+                        @Html.DropDownListFor(m => m.SessionOptions.ExecutorType, Html.GetEnumSelectList<LLamaExecutorType>(), new {  @class = "form-control prompt-control" ,required="required", autocomplete="off"})
+                    </div>
+                    <nav>
+                        <div class="nav nav-tabs" id="nav-tab" role="tablist">
+                            <button class="nav-link active w-50" id="nav-prompt-tab" data-bs-toggle="tab" data-bs-target="#nav-prompt" type="button" role="tab">Prompt</button>
+                            <button class="nav-link w-50" id="nav-params-tab" data-bs-toggle="tab" data-bs-target="#nav-params" type="button" role="tab">Parameters</button>
+                        </div>
+                    </nav>
+                    <div class="tab-content" id="nav-tabContent">
+                        <div class="tab-pane fade show active" id="nav-prompt" role="tabpanel" aria-labelledby="nav-prompt-tab">
+                            <div class="d-flex flex-column mb-2">
+                                <small>Prompt</small>
+                                @Html.TextAreaFor(m => Model.SessionOptions.Prompt, new { @type="text", @class = "form-control prompt-control", rows=8})
+                            </div>
+
+                            <div class="d-flex flex-column mb-2">
+                                <small>AntiPrompts</small>
+                                @Html.TextBoxFor(m => Model.SessionOptions.AntiPrompt, new { @type="text", @class = "form-control prompt-control"})
+                            </div>
+
+                            <div class="d-flex flex-column mb-2">
+                                <small>OutputFilter</small>
+                                @Html.TextBoxFor(m => Model.SessionOptions.OutputFilter, new { @type="text", @class = "form-control prompt-control"})
+                            </div>
+                        </div>
+                        <div class="tab-pane fade" id="nav-params" role="tabpanel" aria-labelledby="nav-params-tab">
+                            @{
+                                await Html.RenderPartialAsync("_Parameters", Model.InferenceOptions);
+                            }
+                        </div>
+                    </div>
+                </div>
+            </form>
+        </div>
+
+        <div class="d-flex flex-grow-1"></div>
+        <div id="session-details" class="m-1"></div>
+        <div class="m-1">
+            <button class="btn btn-outline-success w-100" type="button" id="load">
+          
+                <div class="d-flex align-items-center justify-content-center">
+                    <img class="spinner me-2" style="display:none" src="~/image/loading.gif" width="20" />
+                    Begin Session
+                </div>
+
+                </button>
+            <button class="btn btn-outline-danger w-100" type="button" id="unload" style="display:none">End Session</button>
+        </div>
+    </div>
+
+    <div class="d-flex flex-column h-100 w-75">
+        <div class="section-head">
+        </div>
+
+        <div id="scroll-container" class="section-content border">
+            <div id="output-container" class="d-flex flex-column gap-1 p-1">
+            </div>
+        </div>
+
+        <div class="section-foot">
+            <div class="input-group mt-2">
+                <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea>
+                <div class="d-flex flex-column">
+                    <div class="d-flex flex-fill">
+                        <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button>
+                    </div>
+                    <div class="d-flex">
+                        <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off">
+                            <i class="bi-x-circle"></i>
+                        </button>
+                        <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off">
+                            <i class="bi-trash3"></i>
+                        </button>
+                    </div>
+                </div>
+            </div>
+        </div>
+
+    </div>
 </div>
+
+@{
+    await Html.RenderPartialAsync("_ChatTemplates");
+}
+
+@section Scripts {
+    <script src="~/js/sessionconnectionchat.js"></script>
+    <script>
+        createConnectionSessionChat();
+    </script>
+}
\ No newline at end of file
diff --git a/LLama.Web/Pages/Index.cshtml.cs b/LLama.Web/Pages/Index.cshtml.cs
index 477c9bfb..3647dfec 100644
--- a/LLama.Web/Pages/Index.cshtml.cs
+++ b/LLama.Web/Pages/Index.cshtml.cs
@@ -1,5 +1,7 @@
-﻿using Microsoft.AspNetCore.Mvc;
+﻿using LLama.Web.Common;
+using Microsoft.AspNetCore.Mvc;
 using Microsoft.AspNetCore.Mvc.RazorPages;
+using Microsoft.Extensions.Options;
 
 namespace LLama.Web.Pages
 {
@@ -7,14 +9,33 @@ namespace LLama.Web.Pages
     {
         private readonly ILogger<IndexModel> _logger;
 
-        public IndexModel(ILogger<IndexModel> logger)
+        public IndexModel(ILogger<IndexModel> logger, IOptions<LLamaOptions> options)
         {
             _logger = logger;
+            Options = options.Value;
         }
 
+        public LLamaOptions Options { get; set; }
+
+        [BindProperty]
+        public Common.SessionOptions SessionOptions { get; set; }
+
+        [BindProperty]
+        public InferenceOptions InferenceOptions { get; set; }
+
         public void OnGet()
         {
+            SessionOptions = new Common.SessionOptions
+            {
+                Prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.",
+                AntiPrompt = "User:",
+                // OutputFilter = "User:, Response:"
+            };
 
+            InferenceOptions = new InferenceOptions
+            {
+                Temperature = 0.8f
+            };
         }
     }
 }
\ No newline at end of file
diff --git a/LLama.Web/Pages/Shared/_ChatTemplates.cshtml b/LLama.Web/Pages/Shared/_ChatTemplates.cshtml
index 15644012..cd768f1f 100644
--- a/LLama.Web/Pages/Shared/_ChatTemplates.cshtml
+++ b/LLama.Web/Pages/Shared/_ChatTemplates.cshtml
@@ -12,7 +12,7 @@
              <img src="~/image/human.png" width="60"/>
          </div>
         <div class="d-flex flex-column flex-fill justify-content-between">
-            <span class="w-100" style="resize:none" >{{text}}</span>
+            <span class="content" style="resize:none" >{{text}}</span>
              <div class="d-flex justify-content-end">
                  <i>{{date}}</i>
             </div>
@@ -26,9 +26,7 @@
             <img src="~/image/robot.png" width="60"/>
         </div>
         <div id="{{id}}" class="d-flex flex-column flex-fill justify-content-between">
-            <span class="content">
-                <img src="~/image/loading.gif" width="30" />
-            </span>
+            <span class="content"><img src="~/image/loading.gif" width="30" /></span>
             <div class="d-flex justify-content-end">
                 <div class="d-flex flex-column align-items-end">
                     <i class="date"></i>
@@ -41,20 +39,6 @@
     </div>
 </script>
 
-<script id="sessionDetailsTemplate" type="text/html">
-    <div>
-        <small>Session Details </small>
-    </div>
-    <div>
-        <i>Model: </i>
-         <span>{{model}}</span>
-    </div>
-    <div>
-        <i>Prompt: </i>
-         <span>{{prompt}}</span>
-    </div>
-    <div>
-        <i>Parameters: </i>
-        <span>{{parameter}}</span>
-    </div>
+<script id="signatureTemplate" type="text/html">
+    <span>{{content}}</span>
 </script>
\ No newline at end of file
diff --git a/LLama.Web/Pages/Shared/_Layout.cshtml b/LLama.Web/Pages/Shared/_Layout.cshtml
index 23132bfa..16d6ad52 100644
--- a/LLama.Web/Pages/Shared/_Layout.cshtml
+++ b/LLama.Web/Pages/Shared/_Layout.cshtml
@@ -3,7 +3,7 @@
 <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>@ViewData["Title"] - LLama.Web</title>
+    <title>@ViewData["Title"] - LLamaSharp.Web</title>
     <link rel="stylesheet" href="~/lib/bootstrap/dist/css/bootstrap.min.css" />
     <link href="~/lib/bootstrap/dist/css/bootstrap-icons.css" rel="stylesheet" />
     <link rel="stylesheet" href="~/css/site.css" asp-append-version="true" />
@@ -13,24 +13,26 @@
     <header>
         <nav class="navbar navbar-expand-sm navbar-toggleable-sm navbar-light bg-white border-bottom box-shadow ">
             <div class="container">
-                <a class="navbar-brand" asp-area="" asp-page="/Index">LLama.Web</a>
+                <a class="navbar-brand" asp-area="" asp-page="/Index">LLamaSharp.Web</a>
                 <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target=".navbar-collapse" aria-controls="navbarSupportedContent"
                         aria-expanded="false" aria-label="Toggle navigation">
                     <span class="navbar-toggler-icon"></span>
                 </button>
                 <div class="navbar-collapse collapse d-sm-inline-flex justify-content-between">
-                    <ul class="navbar-nav flex-grow-1">
+                    <ul class="navbar-nav flex-grow-1 justify-content-between">
                         <li class="nav-item">
-                            <a class="nav-link text-dark" asp-area="" asp-page="/Index">Home</a>
+                            <a class="nav-link text-dark" asp-page="/Index"></a>
                         </li>
                         <li class="nav-item">
-                            <a class="nav-link text-dark" asp-area="" asp-page="/Executor/Interactive">Interactive</a>
-                        </li>
-                        <li class="nav-item">
-                            <a class="nav-link text-dark" asp-area="" asp-page="/Executor/Instruct">Instruct</a>
-                        </li>
-                        <li class="nav-item">
-                            <a class="nav-link text-dark" asp-area="" asp-page="/Executor/Stateless">Stateless</a>
+                            <a class="nav-link text-dark" href="https://github.com/SciSharp/LLamaSharp" target="_blank">
+
+                                <div class="d-flex flex-row align-items-center">
+                                    <h5 class="mb-0">
+                                        <i class="bi bi-github"></i>
+                                        <span>LLamaSharp</span>
+                                    </h5>
+                                </div>
+                            </a>
                         </li>
                     </ul>
                 </div>
@@ -38,14 +40,14 @@
         </nav>
     </header>
 
-        <main class="container" role="main" >
-            @RenderBody()
-        </main>
+    <main class="container" role="main">
+        @RenderBody()
+    </main>
 
 
     <footer class="border-top footer text-muted">
         <div class="container">
-            &copy; 2023 - LLama.Web
+            &copy; 2023 - LLamaSharp.Web
         </div>
     </footer>
 
diff --git a/LLama.Web/Pages/Shared/_Parameters.cshtml b/LLama.Web/Pages/Shared/_Parameters.cshtml
new file mode 100644
index 00000000..d6e476c4
--- /dev/null
+++ b/LLama.Web/Pages/Shared/_Parameters.cshtml
@@ -0,0 +1,137 @@
+﻿@page
+@using LLama.Common;
+@model LLama.Abstractions.IInferenceParams
+}
+
+<div class="d-flex flex-row gap-3">
+    <div class="d-flex flex-column mb-2">
+        <small>MaxTokens</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.MaxTokens, new { @type="range", @class = "slider", min="-1", max="2048", step="1" })
+            <label>0</label>
+        </div>
+    </div>
+
+    <div class="d-flex flex-column mb-2">
+        <small>TokensKeep</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.TokensKeep, new { @type="range", @class = "slider", min="0", max="2048", step="1" })
+            <label>0</label>
+        </div>
+    </div>
+</div>
+
+<div class="d-flex flex-row gap-3">
+    <div class="d-flex flex-column mb-2">
+        <small>TopK</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.TopK, new { @type="range", @class = "slider", min="-1", max="100", step="1" })
+            <label>0</label>
+        </div>
+    </div>
+
+    <div class="d-flex flex-column mb-2">
+        <small>TopP</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.TopP, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" })
+            <label>0</label>
+        </div>
+    </div>
+</div>
+
+
+
+<div class="d-flex flex-row gap-3">
+    <div class="d-flex flex-column mb-2">
+        <small>TypicalP</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.TypicalP, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" })
+            <label>0</label>
+        </div>
+    </div>
+
+    <div class="d-flex flex-column mb-2">
+        <small>Temperature</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.Temperature, new { @type="range", @class = "slider", min="0.0", max="1.5", step="0.01" })
+            <label>0</label>
+        </div>
+    </div>
+</div>
+
+<div class="d-flex flex-row gap-3">
+    <div class="d-flex flex-column mb-2">
+        <small>RepeatPenalty</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.RepeatPenalty, new { @type="range", @class = "slider", min="0.0", max="2.0", step="0.01" })
+            <label>0</label>
+        </div>
+    </div>
+
+    <div class="d-flex flex-column mb-2">
+        <small>RepeatLastTokensCount</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.RepeatLastTokensCount, new { @type="range", @class = "slider", min="0", max="2048", step="1" })
+            <label>0</label>
+        </div>
+    </div>
+</div>
+
+<div class="d-flex flex-row gap-3">
+    <div class="d-flex flex-column mb-2">
+        <small>FrequencyPenalty</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.FrequencyPenalty, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" })
+            <label>0</label>
+        </div>
+    </div>
+
+    <div class="d-flex flex-column mb-2">
+        <small>PresencePenalty</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.PresencePenalty, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" })
+            <label>0</label>
+        </div>
+    </div>
+</div>
+
+<div class="d-flex flex-row gap-3">
+    <div class="d-flex flex-column mb-2">
+        <small>TfsZ</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.TfsZ, new { @type="range", @class = "slider",min="0.0", max="1.0", step="0.01" })
+            <label>0</label>
+        </div>
+    </div>
+    <div class="d-flex flex-column mb-2">
+        <small>-</small>
+        <div class="d-flex flex-row slider-container">
+            <input class="slider" type="range" value="0" disabled />
+            <label></label>
+        </div>
+    </div>
+</div>
+
+
+<div class="d-flex flex-column mb-2">
+    <small>Sampler Type</small>
+    @Html.DropDownListFor(m => m.Mirostat,  Html.GetEnumSelectList<MirostatType>(), new { @class = "form-control form-select" })
+</div>
+
+<div class="d-flex flex-row gap-3">
+    <div class="d-flex flex-column mb-2">
+        <small>MirostatTau</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.MirostatTau, new { @type="range", @class = "slider", min="0.0", max="10.0", step="0.01" })
+            <label>0</label>
+        </div>
+    </div>
+
+    <div class="d-flex flex-column mb-2">
+        <small>MirostatEta</small>
+        <div class="d-flex flex-row slider-container">
+            @Html.TextBoxFor(m => m.MirostatEta, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" })
+            <label>0.0</label>
+        </div>
+    </div>
+</div>
\ No newline at end of file
diff --git a/LLama.Web/Program.cs b/LLama.Web/Program.cs
index 6db653a1..7c4583d2 100644
--- a/LLama.Web/Program.cs
+++ b/LLama.Web/Program.cs
@@ -1,6 +1,7 @@
 using LLama.Web.Common;
 using LLama.Web.Hubs;
 using LLama.Web.Services;
+using Microsoft.Extensions.DependencyInjection;
 
 namespace LLama.Web
 {
@@ -20,7 +21,9 @@ namespace LLama.Web
                 .BindConfiguration(nameof(LLamaOptions));
 
             // Services DI
-            builder.Services.AddSingleton<ConnectionSessionService>();
+            builder.Services.AddHostedService<ModelLoaderService>();
+            builder.Services.AddSingleton<IModelService, ModelService>();
+            builder.Services.AddSingleton<IModelSessionService, ModelSessionService>();
 
             var app = builder.Build();
 
diff --git a/LLama.Web/Services/ConnectionSessionService.cs b/LLama.Web/Services/ConnectionSessionService.cs
deleted file mode 100644
index 7dfcde39..00000000
--- a/LLama.Web/Services/ConnectionSessionService.cs
+++ /dev/null
@@ -1,94 +0,0 @@
-﻿using LLama.Abstractions;
-using LLama.Web.Common;
-using LLama.Web.Models;
-using Microsoft.Extensions.Options;
-using System.Collections.Concurrent;
-using System.Drawing;
-
-namespace LLama.Web.Services
-{
-    /// <summary>
-    /// Example Service for handling a model session for a websockets connection lifetime
-    /// Each websocket connection will create its own unique session and context allowing you to use multiple tabs to compare prompts etc
-    /// </summary>
-    public class ConnectionSessionService : IModelSessionService
-    {
-        private readonly LLamaOptions _options;
-        private readonly ILogger<ConnectionSessionService> _logger;
-        private readonly ConcurrentDictionary<string, ModelSession> _modelSessions;
-
-        public ConnectionSessionService(ILogger<ConnectionSessionService> logger, IOptions<LLamaOptions> options)
-        {
-            _logger = logger;
-            _options = options.Value;
-            _modelSessions = new ConcurrentDictionary<string, ModelSession>();
-        }
-
-        public Task<ModelSession> GetAsync(string connectionId)
-        {
-            _modelSessions.TryGetValue(connectionId, out var modelSession);
-            return Task.FromResult(modelSession);
-        }
-
-        public Task<IServiceResult<ModelSession>> CreateAsync(LLamaExecutorType executorType, string connectionId, string modelName, string promptName, string parameterName)
-        {
-            var modelOption = _options.Models.FirstOrDefault(x => x.Name == modelName);
-            if (modelOption is null)
-                return Task.FromResult(ServiceResult.FromError<ModelSession>($"Model option '{modelName}' not found"));
-
-            var promptOption = _options.Prompts.FirstOrDefault(x => x.Name == promptName);
-            if (promptOption is null)
-                return Task.FromResult(ServiceResult.FromError<ModelSession>($"Prompt option '{promptName}' not found"));
-
-            var parameterOption = _options.Parameters.FirstOrDefault(x => x.Name == parameterName);
-            if (parameterOption is null)
-                return Task.FromResult(ServiceResult.FromError<ModelSession>($"Parameter option '{parameterName}' not found"));
-
-
-            //Max instance
-            var currentInstances = _modelSessions.Count(x => x.Value.ModelName == modelOption.Name);
-            if (modelOption.MaxInstances > -1 && currentInstances >= modelOption.MaxInstances)
-                return Task.FromResult(ServiceResult.FromError<ModelSession>("Maximum model instances reached"));
-
-            // Create model
-            var llamaModel = new LLamaContext(modelOption);
-
-            // Create executor
-            ILLamaExecutor executor = executorType switch
-            {
-                LLamaExecutorType.Interactive => new InteractiveExecutor(llamaModel),
-                LLamaExecutorType.Instruct => new InstructExecutor(llamaModel),
-                LLamaExecutorType.Stateless => new StatelessExecutor(llamaModel),
-                _ => default
-            };
-
-            // Create session
-            var modelSession = new ModelSession(executor, modelOption, promptOption, parameterOption);
-            if (!_modelSessions.TryAdd(connectionId, modelSession))
-                return Task.FromResult(ServiceResult.FromError<ModelSession>("Failed to create model session"));
-
-            return Task.FromResult(ServiceResult.FromValue(modelSession));
-        }
-
-        public Task<bool> RemoveAsync(string connectionId)
-        {
-            if (_modelSessions.TryRemove(connectionId, out var modelSession))
-            {
-                modelSession.CancelInfer();
-                modelSession.Dispose();
-                return Task.FromResult(true);
-            }
-            return Task.FromResult(false);
-        }
-
-        public Task<bool> CancelAsync(string connectionId)
-        {
-            if (_modelSessions.TryGetValue(connectionId, out var modelSession))
-            {
-                modelSession.CancelInfer();
-                return Task.FromResult(true);
-            }
-            return Task.FromResult(false);
-        }
-    }
-}
diff --git a/LLama.Web/Services/IModelService.cs b/LLama.Web/Services/IModelService.cs
index 0a98f8f4..ec9e4233 100644
--- a/LLama.Web/Services/IModelService.cs
+++ b/LLama.Web/Services/IModelService.cs
@@ -1,4 +1,5 @@
 ﻿using LLama.Web.Common;
+using LLama.Web.Models;
 
 namespace LLama.Web.Services
 {
diff --git a/LLama.Web/Services/IModelSessionService.cs b/LLama.Web/Services/IModelSessionService.cs
index 4ee0d483..8723d795 100644
--- a/LLama.Web/Services/IModelSessionService.cs
+++ b/LLama.Web/Services/IModelSessionService.cs
@@ -1,16 +1,88 @@
-﻿using LLama.Abstractions;
-using LLama.Web.Common;
+﻿using LLama.Web.Common;
 using LLama.Web.Models;
 
 namespace LLama.Web.Services
 {
     public interface IModelSessionService
     {
+        /// <summary>
+        /// Gets the ModelSession with the specified Id.
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <returns>The ModelSession if exists, otherwise null</returns>
         Task<ModelSession> GetAsync(string sessionId);
-        Task<IServiceResult<ModelSession>> CreateAsync(LLamaExecutorType executorType, string sessionId, string modelName, string promptName, string parameterName);
-        Task<bool> RemoveAsync(string sessionId);
-        Task<bool> CancelAsync(string sessionId);
-    }
 
 
+        /// <summary>
+        /// Gets all ModelSessions
+        /// </summary>
+        /// <returns>A collection oa all Model instances</returns>
+        Task<IEnumerable<ModelSession>> GetAllAsync();
+
+
+        /// <summary>
+        /// Creates a new ModelSession
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <param name="sessionOptions">The session configuration.</param>
+        /// <param name="inferenceOptions">The default inference configuration, will be used for all inference where no infer configuration is supplied.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <returns></returns>
+        /// <exception cref="System.Exception">
+        /// Session with id {sessionId} already exists
+        /// or
+        /// Failed to create model session
+        /// </exception>
+        Task<ModelSession> CreateAsync(string sessionId, Common.SessionOptions sessionOptions, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default);
+
+
+        /// <summary>
+        /// Closes the session
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <returns></returns>
+        Task<bool> CloseAsync(string sessionId);
+
+
+        /// <summary>
+        /// Runs inference on the current ModelSession
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <param name="prompt">The prompt.</param>
+        /// <param name="inferenceConfig">The inference configuration, if null session default is used</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <exception cref="System.Exception">Inference is already running for this session</exception>
+        IAsyncEnumerable<TokenModel> InferAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default);
+
+        /// <summary>
+        /// Runs inference on the current ModelSession
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <param name="prompt">The prompt.</param>
+        /// <param name="inferenceOptions">The inference configuration, if null session default is used</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <returns>Streaming async result of <see cref="System.String" /></returns>
+        /// <exception cref="System.Exception">Inference is already running for this session</exception>
+        IAsyncEnumerable<string> InferTextAsync(string sessionId, string prompt, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default);
+
+
+        /// <summary>
+        /// Queues inference on the current ModelSession
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <param name="prompt">The prompt.</param>
+        /// <param name="inferenceOptions">The inference configuration, if null session default is used</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <returns>Completed inference result as string</returns>
+        /// <exception cref="System.Exception">Inference is already running for this session</exception>
+        Task<string> InferTextCompleteAsync(string sessionId, string prompt, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default);
+
+
+        /// <summary>
+        /// Cancels the current inference action.
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <returns></returns>
+        Task<bool> CancelAsync(string sessionId);
+    }
 }
diff --git a/LLama.Web/Services/ModelLoaderService.cs b/LLama.Web/Services/ModelLoaderService.cs
new file mode 100644
index 00000000..7545885d
--- /dev/null
+++ b/LLama.Web/Services/ModelLoaderService.cs
@@ -0,0 +1,42 @@
+﻿namespace LLama.Web.Services
+{
+
+    /// <summary>
+    /// Service for managing loading/preloading of models at app startup
+    /// </summary>
+    /// <typeparam name="T">Type used to identify contexts</typeparam>
+    /// <seealso cref="Microsoft.Extensions.Hosting.IHostedService" />
+    public class ModelLoaderService : IHostedService 
+    {
+        private readonly IModelService _modelService;
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="ModelLoaderService"/> class.
+        /// </summary>
+        /// <param name="modelService">The model service.</param>
+        public ModelLoaderService(IModelService modelService)
+        {
+            _modelService = modelService;
+        }
+
+
+        /// <summary>
+        /// Triggered when the application host is ready to start the service.
+        /// </summary>
+        /// <param name="cancellationToken">Indicates that the start process has been aborted.</param>
+        public async Task StartAsync(CancellationToken cancellationToken)
+        {
+            await _modelService.LoadModels();
+        }
+
+
+        /// <summary>
+        /// Triggered when the application host is performing a graceful shutdown.
+        /// </summary>
+        /// <param name="cancellationToken">Indicates that the shutdown process should no longer be graceful.</param>
+        public async Task StopAsync(CancellationToken cancellationToken)
+        {
+            await _modelService.UnloadModels();
+        }
+    }
+}
diff --git a/LLama.Web/Services/ModelService.cs b/LLama.Web/Services/ModelService.cs
index 16365a5d..2a3d4788 100644
--- a/LLama.Web/Services/ModelService.cs
+++ b/LLama.Web/Services/ModelService.cs
@@ -1,5 +1,6 @@
 ﻿using LLama.Web.Async;
 using LLama.Web.Common;
+using LLama.Web.Models;
 using System.Collections.Concurrent;
 
 namespace LLama.Web.Services
diff --git a/LLama.Web/Services/ModelSessionService.cs b/LLama.Web/Services/ModelSessionService.cs
new file mode 100644
index 00000000..e808e630
--- /dev/null
+++ b/LLama.Web/Services/ModelSessionService.cs
@@ -0,0 +1,216 @@
+﻿using LLama.Web.Async;
+using LLama.Web.Common;
+using LLama.Web.Models;
+using System.Collections.Concurrent;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace LLama.Web.Services
+{
+    /// <summary>
+    /// Example Service for handling a model session for a websockets connection lifetime
+    /// Each websocket connection will create its own unique session and context allowing you to use multiple tabs to compare prompts etc
+    /// </summary>
+    public class ModelSessionService : IModelSessionService
+    {
+        private readonly AsyncGuard<string> _sessionGuard;
+        private readonly IModelService _modelService;
+        private readonly ConcurrentDictionary<string, ModelSession> _modelSessions;
+
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="ModelSessionService{T}"/> class.
+        /// </summary>
+        /// <param name="modelService">The model service.</param>
+        /// <param name="modelSessionStateService">The model session state service.</param>
+        public ModelSessionService(IModelService modelService)
+        {
+            _modelService = modelService;
+            _sessionGuard = new AsyncGuard<string>();
+            _modelSessions = new ConcurrentDictionary<string, ModelSession>();
+        }
+
+
+        /// <summary>
+        /// Gets the ModelSession with the specified Id.
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <returns>The ModelSession if exists, otherwise null</returns>
+        public Task<ModelSession> GetAsync(string sessionId)
+        {
+            return Task.FromResult(_modelSessions.TryGetValue(sessionId, out var session) ? session : null);
+        }
+
+
+        /// <summary>
+        /// Gets all ModelSessions
+        /// </summary>
+        /// <returns>A collection oa all Model instances</returns>
+        public Task<IEnumerable<ModelSession>> GetAllAsync()
+        {
+            return Task.FromResult<IEnumerable<ModelSession>>(_modelSessions.Values);
+        }
+
+
+        /// <summary>
+        /// Creates a new ModelSession
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <param name="sessionConfig">The session configuration.</param>
+        /// <param name="inferenceConfig">The default inference configuration, will be used for all inference where no infer configuration is supplied.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <returns></returns>
+        /// <exception cref="System.Exception">
+        /// Session with id {sessionId} already exists
+        /// or
+        /// Failed to create model session
+        /// </exception>
+        public async Task<ModelSession> CreateAsync(string sessionId, Common.SessionOptions sessionConfig, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
+        {
+            if (_modelSessions.TryGetValue(sessionId, out _))
+                throw new Exception($"Session with id {sessionId} already exists");
+
+            // Create context
+            var (model, context) = await _modelService.GetOrCreateModelAndContext(sessionConfig.Model, sessionId);
+
+            // Create session
+            var modelSession = new ModelSession(model, context, sessionId, sessionConfig, inferenceConfig);
+            if (!_modelSessions.TryAdd(sessionId, modelSession))
+                throw new Exception($"Failed to create model session");
+
+            // Run initial Prompt
+            await modelSession.InitializePrompt(inferenceConfig, cancellationToken);
+            return modelSession;
+
+        }
+
+
+        /// <summary>
+        /// Closes the session
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <returns></returns>
+        public async Task<bool> CloseAsync(string sessionId)
+        {
+            if (_modelSessions.TryRemove(sessionId, out var modelSession))
+            {
+                modelSession.CancelInfer();
+                return await _modelService.RemoveContext(modelSession.ModelName, sessionId);
+            }
+            return false;
+        }
+
+
+        /// <summary>
+        /// Runs inference on the current ModelSession
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <param name="prompt">The prompt.</param>
+        /// <param name="inferenceConfig">The inference configuration, if null session default is used</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <exception cref="System.Exception">Inference is already running for this session</exception>
+        public async IAsyncEnumerable<TokenModel> InferAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+        {
+            if (!_sessionGuard.Guard(sessionId))
+                throw new Exception($"Inference is already running for this session");
+
+            try
+            {
+                if (!_modelSessions.TryGetValue(sessionId, out var modelSession))
+                    yield break;
+
+                // Send begin of response
+                var stopwatch = Stopwatch.GetTimestamp();
+                yield return new TokenModel(default, default, TokenType.Begin);
+
+                // Send content of response
+                await foreach (var token in modelSession.InferAsync(prompt, inferenceConfig, cancellationToken).ConfigureAwait(false))
+                {
+                    yield return new TokenModel(default, token);
+                }
+
+                // Send end of response
+                var elapsedTime = GetElapsed(stopwatch);
+                var endTokenType = modelSession.IsInferCanceled() ? TokenType.Cancel : TokenType.End;
+                var signature = endTokenType == TokenType.Cancel
+                      ? $"Inference cancelled after {elapsedTime / 1000:F0} seconds"
+                      : $"Inference completed in {elapsedTime / 1000:F0} seconds";
+                yield return new TokenModel(default, signature, endTokenType);
+            }
+            finally
+            {
+                _sessionGuard.Release(sessionId);
+            }
+        }
+
+
+        /// <summary>
+        /// Runs inference on the current ModelSession
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <param name="prompt">The prompt.</param>
+        /// <param name="inferenceConfig">The inference configuration, if null session default is used</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <returns>Streaming async result of <see cref="System.String" /></returns>
+        /// <exception cref="System.Exception">Inference is already running for this session</exception>
+        public IAsyncEnumerable<string> InferTextAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
+        {
+            async IAsyncEnumerable<string> InferTextInternal()
+            {
+                await foreach (var token in InferAsync(sessionId, prompt, inferenceConfig, cancellationToken).ConfigureAwait(false))
+                {
+                    if (token.TokenType ==  TokenType.Content)
+                        yield return token.Content;
+                }
+            }
+            return InferTextInternal();
+        }
+
+
+        /// <summary>
+        /// Runs inference on the current ModelSession
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <param name="prompt">The prompt.</param>
+        /// <param name="inferenceConfig">The inference configuration, if null session default is used</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <returns>Completed inference result as string</returns>
+        /// <exception cref="System.Exception">Inference is already running for this session</exception>
+        public async Task<string> InferTextCompleteAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
+        {
+            var inferResult = await InferAsync(sessionId, prompt, inferenceConfig, cancellationToken)
+                .Where(x => x.TokenType == TokenType.Content)
+                .Select(x => x.Content)
+                .ToListAsync(cancellationToken: cancellationToken);
+
+            return string.Concat(inferResult);
+        }
+
+
+        /// <summary>
+        /// Cancels the current inference action.
+        /// </summary>
+        /// <param name="sessionId">The session identifier.</param>
+        /// <returns></returns>
+        public Task<bool> CancelAsync(string sessionId)
+        {
+            if (_modelSessions.TryGetValue(sessionId, out var modelSession))
+            {
+                modelSession.CancelInfer();
+                return Task.FromResult(true);
+            }
+            return Task.FromResult(false);
+        }
+
+
+        /// <summary>
+        /// Gets the elapsed time in milliseconds.
+        /// </summary>
+        /// <param name="timestamp">The timestamp.</param>
+        /// <returns></returns>
+        private static int GetElapsed(long timestamp)
+        {
+            return (int)Stopwatch.GetElapsedTime(timestamp).TotalMilliseconds;
+        }
+    }
+}
diff --git a/LLama.Web/appsettings.json b/LLama.Web/appsettings.json
index 9f340a9c..6231b882 100644
--- a/LLama.Web/appsettings.json
+++ b/LLama.Web/appsettings.json
@@ -7,48 +7,34 @@
   },
   "AllowedHosts": "*",
   "LLamaOptions": {
+    "ModelLoadType": "Single",
     "Models": [
       {
         "Name": "WizardLM-7B",
-        "MaxInstances": 2,
+        "MaxInstances": 20,
         "ModelPath": "D:\\Repositories\\AI\\Models\\wizardLM-7B.ggmlv3.q4_0.bin",
-        "ContextSize": 2048
-      }
-    ],
-    "Parameters": [
-      {
-        "Name": "Default",
-        "Temperature": 0.6
-      }
-    ],
-    "Prompts": [
-      {
-        "Name": "None",
-        "Prompt": ""
-      },
-      {
-        "Name": "Alpaca",
-        "Path": "D:\\Repositories\\AI\\Prompts\\alpaca.txt",
-        "AntiPrompt": [
-          "User:"
-        ],
-        "OutputFilter": [
-          "Response:",
-          "User:"
-        ]
-      },
-      {
-        "Name": "ChatWithBob",
-        "Path": "D:\\Repositories\\AI\\Prompts\\chat-with-bob.txt",
-        "AntiPrompt": [
-          "User:"
-        ],
-        "OutputFilter": [
-          "Bob:",
-          "User:"
-        ]
+        "ContextSize": 2048,
+        "BatchSize": 2048,
+        "Threads": 4,
+        "GpuLayerCount": 6,
+        "UseMemorymap": true,
+        "UseMemoryLock": false,
+        "MainGpu": 0,
+        "LowVram": false,
+        "Seed": 1686349486,
+        "UseFp16Memory": true,
+        "Perplexity": false,
+        "LoraAdapter": "",
+        "LoraBase": "",
+        "EmbeddingMode": false,
+        "TensorSplits": null,
+        "GroupedQueryAttention": 1,
+        "RmsNormEpsilon": 0.000005,
+        "RopeFrequencyBase": 10000.0,
+        "RopeFrequencyScale": 1.0,
+        "MulMatQ": false,
+        "Encoding": "UTF-8"
       }
     ]
-
   }
 }
diff --git a/LLama.Web/wwwroot/css/site.css b/LLama.Web/wwwroot/css/site.css
index d10ef975..14685f45 100644
--- a/LLama.Web/wwwroot/css/site.css
+++ b/LLama.Web/wwwroot/css/site.css
@@ -22,13 +22,30 @@ footer {
 
 
 @media (min-width: 768px) {
-  html {
-    font-size: 16px;
-  }
+    html {
+        font-size: 16px;
+    }
 }
 
 .btn:focus, .btn:active:focus, .btn-link.nav-link:focus, .form-control:focus, .form-check-input:focus {
-  box-shadow: 0 0 0 0.1rem white, 0 0 0 0.25rem #258cfb;
+    box-shadow: 0 0 0 0.1rem white, 0 0 0 0.25rem #258cfb;
+}
+
+#scroll-container {
+    flex: 1;
+    overflow-y: scroll;
+}
+
+#output-container .content {
+    white-space: break-spaces;
 }
 
 
+.slider-container > .slider {
+    width: 100%;
+}
+
+.slider-container > label {
+    width: 50px;
+    text-align: center;
+}
diff --git a/LLama.Web/wwwroot/js/sessionConnectionChat.js b/LLama.Web/wwwroot/js/sessionConnectionChat.js
index 472b5971..719c44ac 100644
--- a/LLama.Web/wwwroot/js/sessionConnectionChat.js
+++ b/LLama.Web/wwwroot/js/sessionConnectionChat.js
@@ -1,26 +1,26 @@
-const createConnectionSessionChat = (LLamaExecutorType) => {
+const createConnectionSessionChat = () => {
     const outputErrorTemplate = $("#outputErrorTemplate").html();
     const outputInfoTemplate = $("#outputInfoTemplate").html();
     const outputUserTemplate = $("#outputUserTemplate").html();
     const outputBotTemplate = $("#outputBotTemplate").html();
-    const sessionDetailsTemplate = $("#sessionDetailsTemplate").html();
+    const signatureTemplate = $("#signatureTemplate").html();
 
-    let connectionId;
+    let inferenceSession;
     const connection = new signalR.HubConnectionBuilder().withUrl("/SessionConnectionHub").build();
 
     const scrollContainer = $("#scroll-container");
     const outputContainer = $("#output-container");
     const chatInput = $("#input");
 
-
     const onStatus = (connection, status) => {
-        connectionId = connection;
         if (status == Enums.SessionConnectionStatus.Connected) {
             $("#socket").text("Connected").addClass("text-success");
         }
         else if (status == Enums.SessionConnectionStatus.Loaded) {
+            loaderHide();
             enableControls();
-            $("#session-details").html(Mustache.render(sessionDetailsTemplate, { model: getSelectedModel(), prompt: getSelectedPrompt(), parameter: getSelectedParameter() }));
+            $("#load").hide();
+            $("#unload").show();
             onInfo(`New model session successfully started`)
         }
     }
@@ -36,30 +36,31 @@ const createConnectionSessionChat = (LLamaExecutorType) => {
 
     let responseContent;
     let responseContainer;
-    let responseFirstFragment;
+    let responseFirstToken;
 
     const onResponse = (response) => {
         if (!response)
             return;
 
-        if (response.isFirst) {
-            outputContainer.append(Mustache.render(outputBotTemplate, response));
-            responseContainer = $(`#${response.id}`);
+        if (response.tokenType == Enums.TokenType.Begin) {
+            const uniqueId = randomString();
+            outputContainer.append(Mustache.render(outputBotTemplate, { id: uniqueId, ...response }));
+            responseContainer = $(`#${uniqueId}`);
             responseContent = responseContainer.find(".content");
-            responseFirstFragment = true;
+            responseFirstToken = true;
             scrollToBottom(true);
             return;
         }
 
-        if (response.isLast) {
+        if (response.tokenType == Enums.TokenType.End || response.tokenType == Enums.TokenType.Cancel) {
             enableControls();
-            responseContainer.find(".signature").append(response.content);
+            responseContainer.find(".signature").append(Mustache.render(signatureTemplate, response));
             scrollToBottom();
         }
         else {
-            if (responseFirstFragment) {
+            if (responseFirstToken) {
                 responseContent.empty();
-                responseFirstFragment = false;
+                responseFirstToken = false;
                 responseContainer.find(".date").append(getDateTime());
             }
             responseContent.append(response.content);
@@ -67,45 +68,88 @@ const createConnectionSessionChat = (LLamaExecutorType) => {
         }
     }
 
-
     const sendPrompt = async () => {
         const text = chatInput.val();
         if (text) {
+            chatInput.val(null);
             disableControls();
             outputContainer.append(Mustache.render(outputUserTemplate, { text: text, date: getDateTime() }));
-            await connection.invoke('SendPrompt', text);
-            chatInput.val(null);
+            inferenceSession = await connection
+                .stream("SendPrompt", text, serializeFormToJson('SessionParameters'))
+                .subscribe({
+                    next: onResponse,
+                    complete: onResponse,
+                    error: onError,
+                });
             scrollToBottom(true);
         }
     }
 
     const cancelPrompt = async () => {
-        await ajaxPostJsonAsync('?handler=Cancel', { connectionId: connectionId });
+        if (inferenceSession)
+            inferenceSession.dispose();
     }
 
     const loadModel = async () => {
-        const modelName = getSelectedModel();
-        const promptName = getSelectedPrompt();
-        const parameterName = getSelectedParameter();
-        if (!modelName || !promptName || !parameterName) {
-            onError("Please select a valid Model, Parameter and Prompt");
-            return;
-        }
+        const sessionParams = serializeFormToJson('SessionParameters');
+        loaderShow();
+        disableControls();
+        disablePromptControls();
+        $("#load").attr("disabled", "disabled");
 
+        // TODO: Split parameters sets
+        await connection.invoke('LoadModel', sessionParams, sessionParams);
+    }
+
+    const unloadModel = async () => {
         disableControls();
-        await connection.invoke('LoadModel', LLamaExecutorType, modelName, promptName, parameterName);
+        enablePromptControls();
+        $("#load").removeAttr("disabled");
     }
 
+    const serializeFormToJson = (form) => {
+        const formDataJson = {};
+        const formData = new FormData(document.getElementById(form));
+        formData.forEach((value, key) => {
+
+            if (key.includes("."))
+                key = key.split(".")[1];
+
+            // Convert number strings to numbers
+            if (!isNaN(value) && value.trim() !== "") {
+                formDataJson[key] = parseFloat(value);
+            }
+            // Convert boolean strings to booleans
+            else if (value === "true" || value === "false") {
+                formDataJson[key] = (value === "true");
+            }
+            else {
+                formDataJson[key] = value;
+            }
+        });
+        return formDataJson;
+    }
 
     const enableControls = () => {
         $(".input-control").removeAttr("disabled");
     }
 
-
     const disableControls = () => {
         $(".input-control").attr("disabled", "disabled");
     }
 
+    const enablePromptControls = () => {
+        $("#load").show();
+        $("#unload").hide();
+        $(".prompt-control").removeAttr("disabled");
+        activatePromptTab();
+    }
+
+    const disablePromptControls = () => {
+        $(".prompt-control").attr("disabled", "disabled");
+        activateParamsTab();
+    }
+
     const clearOutput = () => {
         outputContainer.empty();
     }
@@ -117,27 +161,14 @@ const createConnectionSessionChat = (LLamaExecutorType) => {
         customPrompt.text(selectedValue);
     }
 
-
-    const getSelectedModel = () => {
-        return $("option:selected", "#Model").val();
-    }
-
-
-    const getSelectedParameter = () => {
-        return $("option:selected", "#Parameter").val();
-    }
-
-
-    const getSelectedPrompt = () => {
-        return $("option:selected", "#Prompt").val();
-    }
-
-
     const getDateTime = () => {
         const dateTime = new Date();
         return dateTime.toLocaleString();
     }
 
+    const randomString = () => {
+        return Math.random().toString(36).slice(2);
+    }
 
     const scrollToBottom = (force) => {
         const scrollTop = scrollContainer.scrollTop();
@@ -151,10 +182,25 @@ const createConnectionSessionChat = (LLamaExecutorType) => {
         }
     }
 
+    const activatePromptTab = () => {
+        $("#nav-prompt-tab").trigger("click");
+    }
 
+    const activateParamsTab = () => {
+        $("#nav-params-tab").trigger("click");
+    }
+
+    const loaderShow = () => {
+        $(".spinner").show();
+    }
+
+    const loaderHide = () => {
+        $(".spinner").hide();
+    }
 
     // Map UI functions
     $("#load").on("click", loadModel);
+    $("#unload").on("click", unloadModel);
     $("#send").on("click", sendPrompt);
     $("#clear").on("click", clearOutput);
     $("#cancel").on("click", cancelPrompt);
@@ -165,7 +211,10 @@ const createConnectionSessionChat = (LLamaExecutorType) => {
             sendPrompt();
         }
     });
-
+    $(".slider").on("input", function (e) {
+        const slider = $(this);
+        slider.next().text(slider.val());
+    }).trigger("input");
 
 
     // Map signalr functions
diff --git a/LLama.Web/wwwroot/js/site.js b/LLama.Web/wwwroot/js/site.js
index 2f679669..6612c772 100644
--- a/LLama.Web/wwwroot/js/site.js
+++ b/LLama.Web/wwwroot/js/site.js
@@ -40,11 +40,17 @@ const Enums = {
 		Loaded: 4,
 		Connected: 10
 	}),
-	LLamaExecutorType: Object.freeze({
+	ExecutorType: Object.freeze({
 		Interactive: 0,
 		Instruct: 1,
 		Stateless: 2
 	}),
+	TokenType: Object.freeze({
+		Begin: 0,
+		Content: 2,
+		End: 4,
+		Cancel: 10
+	}),
 	GetName: (enumType, enumKey) => {
 		return Object.keys(enumType)[enumKey]
 	},

From e2a17d6b6f0490cfbe3d88b66d1be4eab58daaa1 Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Wed, 4 Oct 2023 13:35:18 +1300
Subject: [PATCH 3/7] Refactor conflicting object name SessionOptions

---
 LLama.Web/Common/ISessionConfig.cs            | 13 ++++++++
 .../{SessionOptions.cs => SessionConfig.cs}   |  2 +-
 LLama.Web/{Extensioms.cs => Extensions.cs}    |  6 ++--
 LLama.Web/Hubs/SessionConnectionHub.cs        |  2 +-
 LLama.Web/Models/ModelSession.cs              | 30 +++++++++----------
 LLama.Web/Pages/Index.cshtml                  | 10 +++----
 LLama.Web/Pages/Index.cshtml.cs               |  4 +--
 LLama.Web/Services/IModelSessionService.cs    |  4 +--
 LLama.Web/Services/ModelSessionService.cs     |  2 +-
 9 files changed, 43 insertions(+), 30 deletions(-)
 create mode 100644 LLama.Web/Common/ISessionConfig.cs
 rename LLama.Web/Common/{SessionOptions.cs => SessionConfig.cs} (89%)
 rename LLama.Web/{Extensioms.cs => Extensions.cs} (88%)

diff --git a/LLama.Web/Common/ISessionConfig.cs b/LLama.Web/Common/ISessionConfig.cs
new file mode 100644
index 00000000..09bddc2d
--- /dev/null
+++ b/LLama.Web/Common/ISessionConfig.cs
@@ -0,0 +1,13 @@
+﻿namespace LLama.Web.Common
+{
+    public interface ISessionConfig
+    {
+        string AntiPrompt { get; set; }
+        List<string> AntiPrompts { get; set; }
+        LLamaExecutorType ExecutorType { get; set; }
+        string Model { get; set; }
+        string OutputFilter { get; set; }
+        List<string> OutputFilters { get; set; }
+        string Prompt { get; set; }
+    }
+}
\ No newline at end of file
diff --git a/LLama.Web/Common/SessionOptions.cs b/LLama.Web/Common/SessionConfig.cs
similarity index 89%
rename from LLama.Web/Common/SessionOptions.cs
rename to LLama.Web/Common/SessionConfig.cs
index 34386955..f0a2d22b 100644
--- a/LLama.Web/Common/SessionOptions.cs
+++ b/LLama.Web/Common/SessionConfig.cs
@@ -1,6 +1,6 @@
 ﻿namespace LLama.Web.Common
 {
-    public class SessionOptions
+    public class SessionConfig : ISessionConfig
     {
         public string Model { get; set; }
         public string Prompt { get; set; }
diff --git a/LLama.Web/Extensioms.cs b/LLama.Web/Extensions.cs
similarity index 88%
rename from LLama.Web/Extensioms.cs
rename to LLama.Web/Extensions.cs
index 50bb55c4..99f745dd 100644
--- a/LLama.Web/Extensioms.cs
+++ b/LLama.Web/Extensions.cs
@@ -2,14 +2,14 @@
 
 namespace LLama.Web
 {
-    public static  class Extensioms
+    public static  class Extensions
     {
         /// <summary>
         /// Combines the AntiPrompts list and AntiPrompt csv 
         /// </summary>
         /// <param name="sessionConfig">The session configuration.</param>
         /// <returns>Combined AntiPrompts with duplicates removed</returns>
-        public static List<string> GetAntiPrompts(this Common.SessionOptions sessionConfig)
+        public static List<string> GetAntiPrompts(this ISessionConfig sessionConfig)
         {
             return CombineCSV(sessionConfig.AntiPrompts, sessionConfig.AntiPrompt);
         }
@@ -19,7 +19,7 @@ namespace LLama.Web
         /// </summary>
         /// <param name="sessionConfig">The session configuration.</param>
         /// <returns>Combined OutputFilters with duplicates removed</returns>
-        public static List<string> GetOutputFilters(this Common.SessionOptions sessionConfig)
+        public static List<string> GetOutputFilters(this ISessionConfig sessionConfig)
         {
             return CombineCSV(sessionConfig.OutputFilters, sessionConfig.OutputFilter);
         }
diff --git a/LLama.Web/Hubs/SessionConnectionHub.cs b/LLama.Web/Hubs/SessionConnectionHub.cs
index 730d4e87..24457683 100644
--- a/LLama.Web/Hubs/SessionConnectionHub.cs
+++ b/LLama.Web/Hubs/SessionConnectionHub.cs
@@ -37,7 +37,7 @@ namespace LLama.Web.Hubs
 
 
         [HubMethodName("LoadModel")]
-        public async Task OnLoadModel(Common.SessionOptions sessionConfig, InferenceOptions inferenceConfig)
+        public async Task OnLoadModel(ISessionConfig sessionConfig, InferenceOptions inferenceConfig)
         {
             _logger.Log(LogLevel.Information, "[OnLoadModel] - Load new model, Connection: {0}", Context.ConnectionId);
             await _modelSessionService.CloseAsync(Context.ConnectionId);
diff --git a/LLama.Web/Models/ModelSession.cs b/LLama.Web/Models/ModelSession.cs
index 35413f92..91c8920f 100644
--- a/LLama.Web/Models/ModelSession.cs
+++ b/LLama.Web/Models/ModelSession.cs
@@ -9,21 +9,21 @@ namespace LLama.Web.Models
         private readonly LLamaModel _model;
         private readonly LLamaContext _context;
         private readonly ILLamaExecutor _executor;
-        private readonly Common.SessionOptions _sessionParams;
+        private readonly ISessionConfig _sessionConfig;
         private readonly ITextStreamTransform _outputTransform;
         private readonly InferenceOptions _defaultInferenceConfig;
 
         private CancellationTokenSource _cancellationTokenSource;
 
-        public ModelSession(LLamaModel model, LLamaContext context, string sessionId, Common.SessionOptions sessionOptions, InferenceOptions inferenceOptions = null)
+        public ModelSession(LLamaModel model, LLamaContext context, string sessionId, ISessionConfig sessionConfig, InferenceOptions inferenceOptions = null)
         {
             _model = model;
             _context = context;
             _sessionId = sessionId;
-            _sessionParams = sessionOptions;
+            _sessionConfig = sessionConfig;
             _defaultInferenceConfig = inferenceOptions ?? new InferenceOptions();
-            _outputTransform = CreateOutputFilter(_sessionParams);
-            _executor = CreateExecutor(_model, _context, _sessionParams);
+            _outputTransform = CreateOutputFilter();
+            _executor = CreateExecutor();
         }
 
         /// <summary>
@@ -34,7 +34,7 @@ namespace LLama.Web.Models
         /// <summary>
         /// Gets the name of the model.
         /// </summary>
-        public string ModelName => _sessionParams.Model;
+        public string ModelName => _sessionConfig.Model;
 
         /// <summary>
         /// Gets the context.
@@ -44,7 +44,7 @@ namespace LLama.Web.Models
         /// <summary>
         /// Gets the session configuration.
         /// </summary>
-        public Common.SessionOptions SessionConfig => _sessionParams;
+        public ISessionConfig SessionConfig => _sessionConfig;
 
         /// <summary>
         /// Gets the inference parameters.
@@ -60,16 +60,16 @@ namespace LLama.Web.Models
         /// <param name="cancellationToken">The cancellation token.</param>
         internal async Task InitializePrompt(InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
         {
-            if (_sessionParams.ExecutorType == LLamaExecutorType.Stateless)
+            if (_sessionConfig.ExecutorType == LLamaExecutorType.Stateless)
                 return;
 
-            if (string.IsNullOrEmpty(_sessionParams.Prompt))
+            if (string.IsNullOrEmpty(_sessionConfig.Prompt))
                 return;
 
             // Run Initial prompt
             var inferenceParams = ConfigureInferenceParams(inferenceConfig);
             _cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
-            await foreach (var _ in _executor.InferAsync(_sessionParams.Prompt, inferenceParams, _cancellationTokenSource.Token))
+            await foreach (var _ in _executor.InferAsync(_sessionConfig.Prompt, inferenceParams, _cancellationTokenSource.Token))
             {
                 // We dont really need the response of the initial prompt, so exit on first token
                 break;
@@ -114,13 +114,13 @@ namespace LLama.Web.Models
         private IInferenceParams ConfigureInferenceParams(InferenceOptions inferenceConfig)
         {
             var inferenceParams = inferenceConfig ?? _defaultInferenceConfig;
-            inferenceParams.AntiPrompts = _sessionParams.GetAntiPrompts();
+            inferenceParams.AntiPrompts = _sessionConfig.GetAntiPrompts();
             return inferenceParams;
         }
 
-        private ITextStreamTransform CreateOutputFilter(Common.SessionOptions sessionConfig)
+        private ITextStreamTransform CreateOutputFilter()
         {
-            var outputFilters = sessionConfig.GetOutputFilters();
+            var outputFilters = _sessionConfig.GetOutputFilters();
             if (outputFilters.Count > 0)
                 return new LLamaTransforms.KeywordTextOutputStreamTransform(outputFilters);
 
@@ -128,9 +128,9 @@ namespace LLama.Web.Models
         }
 
 
-        private ILLamaExecutor CreateExecutor(LLamaModel model, LLamaContext context, Common.SessionOptions sessionConfig)
+        private ILLamaExecutor CreateExecutor()
         {
-            return sessionConfig.ExecutorType switch
+            return _sessionConfig.ExecutorType switch
             {
                 LLamaExecutorType.Interactive => new InteractiveExecutor(_context),
                 LLamaExecutorType.Instruct => new InstructExecutor(_context),
diff --git a/LLama.Web/Pages/Index.cshtml b/LLama.Web/Pages/Index.cshtml
index 55512603..3df4b699 100644
--- a/LLama.Web/Pages/Index.cshtml
+++ b/LLama.Web/Pages/Index.cshtml
@@ -24,11 +24,11 @@
                 <div class="d-flex flex-column m-1">
                     <div class="d-flex flex-column mb-2">
                         <small>Model</small>
-                        @Html.DropDownListFor(m => m.SessionOptions.Model, new SelectList(Model.Options.Models, "Name", "Name"), new {  @class = "form-control prompt-control" ,required="required", autocomplete="off"})
+                        @Html.DropDownListFor(m => m.SessionConfig.Model, new SelectList(Model.Options.Models, "Name", "Name"), new {  @class = "form-control prompt-control" ,required="required", autocomplete="off"})
                     </div>
                     <div class="d-flex flex-column mb-2">
                         <small>Inference Type</small>
-                        @Html.DropDownListFor(m => m.SessionOptions.ExecutorType, Html.GetEnumSelectList<LLamaExecutorType>(), new {  @class = "form-control prompt-control" ,required="required", autocomplete="off"})
+                        @Html.DropDownListFor(m => m.SessionConfig.ExecutorType, Html.GetEnumSelectList<LLamaExecutorType>(), new {  @class = "form-control prompt-control" ,required="required", autocomplete="off"})
                     </div>
                     <nav>
                         <div class="nav nav-tabs" id="nav-tab" role="tablist">
@@ -40,17 +40,17 @@
                         <div class="tab-pane fade show active" id="nav-prompt" role="tabpanel" aria-labelledby="nav-prompt-tab">
                             <div class="d-flex flex-column mb-2">
                                 <small>Prompt</small>
-                                @Html.TextAreaFor(m => Model.SessionOptions.Prompt, new { @type="text", @class = "form-control prompt-control", rows=8})
+                                @Html.TextAreaFor(m => Model.SessionConfig.Prompt, new { @type="text", @class = "form-control prompt-control", rows=8})
                             </div>
 
                             <div class="d-flex flex-column mb-2">
                                 <small>AntiPrompts</small>
-                                @Html.TextBoxFor(m => Model.SessionOptions.AntiPrompt, new { @type="text", @class = "form-control prompt-control"})
+                                @Html.TextBoxFor(m => Model.SessionConfig.AntiPrompt, new { @type="text", @class = "form-control prompt-control"})
                             </div>
 
                             <div class="d-flex flex-column mb-2">
                                 <small>OutputFilter</small>
-                                @Html.TextBoxFor(m => Model.SessionOptions.OutputFilter, new { @type="text", @class = "form-control prompt-control"})
+                                @Html.TextBoxFor(m => Model.SessionConfig.OutputFilter, new { @type="text", @class = "form-control prompt-control"})
                             </div>
                         </div>
                         <div class="tab-pane fade" id="nav-params" role="tabpanel" aria-labelledby="nav-params-tab">
diff --git a/LLama.Web/Pages/Index.cshtml.cs b/LLama.Web/Pages/Index.cshtml.cs
index 3647dfec..d61a33da 100644
--- a/LLama.Web/Pages/Index.cshtml.cs
+++ b/LLama.Web/Pages/Index.cshtml.cs
@@ -18,14 +18,14 @@ namespace LLama.Web.Pages
         public LLamaOptions Options { get; set; }
 
         [BindProperty]
-        public Common.SessionOptions SessionOptions { get; set; }
+        public ISessionConfig SessionConfig { get; set; }
 
         [BindProperty]
         public InferenceOptions InferenceOptions { get; set; }
 
         public void OnGet()
         {
-            SessionOptions = new Common.SessionOptions
+            SessionConfig = new SessionConfig
             {
                 Prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.",
                 AntiPrompt = "User:",
diff --git a/LLama.Web/Services/IModelSessionService.cs b/LLama.Web/Services/IModelSessionService.cs
index 8723d795..a3ad9303 100644
--- a/LLama.Web/Services/IModelSessionService.cs
+++ b/LLama.Web/Services/IModelSessionService.cs
@@ -24,7 +24,7 @@ namespace LLama.Web.Services
         /// Creates a new ModelSession
         /// </summary>
         /// <param name="sessionId">The session identifier.</param>
-        /// <param name="sessionOptions">The session configuration.</param>
+        /// <param name="sessionConfig">The session configuration.</param>
         /// <param name="inferenceOptions">The default inference configuration, will be used for all inference where no infer configuration is supplied.</param>
         /// <param name="cancellationToken">The cancellation token.</param>
         /// <returns></returns>
@@ -33,7 +33,7 @@ namespace LLama.Web.Services
         /// or
         /// Failed to create model session
         /// </exception>
-        Task<ModelSession> CreateAsync(string sessionId, Common.SessionOptions sessionOptions, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default);
+        Task<ModelSession> CreateAsync(string sessionId, ISessionConfig sessionConfig, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default);
 
 
         /// <summary>
diff --git a/LLama.Web/Services/ModelSessionService.cs b/LLama.Web/Services/ModelSessionService.cs
index e808e630..84070d94 100644
--- a/LLama.Web/Services/ModelSessionService.cs
+++ b/LLama.Web/Services/ModelSessionService.cs
@@ -65,7 +65,7 @@ namespace LLama.Web.Services
         /// or
         /// Failed to create model session
         /// </exception>
-        public async Task<ModelSession> CreateAsync(string sessionId, Common.SessionOptions sessionConfig, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
+        public async Task<ModelSession> CreateAsync(string sessionId, ISessionConfig sessionConfig, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default)
         {
             if (_modelSessions.TryGetValue(sessionId, out _))
                 throw new Exception($"Session with id {sessionId} already exists");

From 9b8de007dc5e26ac425d916c15191907580a8b54 Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Wed, 4 Oct 2023 13:47:08 +1300
Subject: [PATCH 4/7] Propagate ILogger

---
 LLama.Examples/NewVersion/CodingAssistant.cs | 2 +-
 LLama.Web/Models/LLamaModel.cs               | 6 ++++--
 LLama.Web/Services/ModelService.cs           | 6 ++++--
 LLama/LLamaInstructExecutor.cs               | 6 ++++--
 LLama/LLamaInteractExecutor.cs               | 4 +++-
 LLama/LLamaStatelessExecutor.cs              | 6 +++++-
 LLama/LLamaWeights.cs                        | 6 ++++--
 7 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/LLama.Examples/NewVersion/CodingAssistant.cs b/LLama.Examples/NewVersion/CodingAssistant.cs
index 69e997d3..9108e01d 100644
--- a/LLama.Examples/NewVersion/CodingAssistant.cs
+++ b/LLama.Examples/NewVersion/CodingAssistant.cs
@@ -31,7 +31,7 @@
             };
             using var model = LLamaWeights.LoadFromFile(parameters);
             using var context = model.CreateContext(parameters);
-            var executor = new InstructExecutor(context, InstructionPrefix, InstructionSuffix);
+            var executor = new InstructExecutor(context, null!, InstructionPrefix, InstructionSuffix);
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions." +
diff --git a/LLama.Web/Models/LLamaModel.cs b/LLama.Web/Models/LLamaModel.cs
index 71bb290e..5aedc5f5 100644
--- a/LLama.Web/Models/LLamaModel.cs
+++ b/LLama.Web/Models/LLamaModel.cs
@@ -10,6 +10,7 @@ namespace LLama.Web.Models
     /// <seealso cref="IDisposable" />
     public class LLamaModel : IDisposable
     {
+        private readonly ILogger _llamaLogger;
         private readonly ModelOptions _config;
         private readonly LLamaWeights _weights;
         private readonly ConcurrentDictionary<string, LLamaContext> _contexts;
@@ -18,9 +19,10 @@ namespace LLama.Web.Models
         /// Initializes a new instance of the <see cref="LLamaModel"/> class.
         /// </summary>
         /// <param name="modelParams">The model parameters.</param>
-        public LLamaModel(ModelOptions modelParams)
+        public LLamaModel(ModelOptions modelParams, ILogger llamaLogger)
         {
             _config = modelParams;
+            _llamaLogger = llamaLogger;
             _weights = LLamaWeights.LoadFromFile(modelParams);
             _contexts = new ConcurrentDictionary<string, LLamaContext>();
         }
@@ -56,7 +58,7 @@ namespace LLama.Web.Models
             if (_config.MaxInstances > -1 && ContextCount >= _config.MaxInstances)
                 throw new Exception($"Maximum model instances reached");
 
-            context = _weights.CreateContext(_config);
+            context = _weights.CreateContext(_config, _llamaLogger);
             if (_contexts.TryAdd(contextName, context))
                 return Task.FromResult(context);
 
diff --git a/LLama.Web/Services/ModelService.cs b/LLama.Web/Services/ModelService.cs
index 2a3d4788..dfb34bb6 100644
--- a/LLama.Web/Services/ModelService.cs
+++ b/LLama.Web/Services/ModelService.cs
@@ -11,6 +11,7 @@ namespace LLama.Web.Services
     /// </summary>
     public class ModelService : IModelService
     {
+        private readonly ILogger _llamaLogger;
         private readonly AsyncLock _modelLock;
         private readonly AsyncLock _contextLock;
         private readonly LLamaOptions _configuration;
@@ -22,8 +23,9 @@ namespace LLama.Web.Services
         /// </summary>
         /// <param name="logger">The logger.</param>
         /// <param name="options">The options.</param>
-        public ModelService(LLamaOptions configuration)
+        public ModelService(LLamaOptions configuration, ILogger llamaLogger)
         {
+            _llamaLogger = llamaLogger;
             _modelLock = new AsyncLock();
             _contextLock = new AsyncLock();
             _configuration = configuration;
@@ -52,7 +54,7 @@ namespace LLama.Web.Services
                     await UnloadModels();
 
 
-                model = new LLamaModel(modelOptions);
+                model = new LLamaModel(modelOptions, _llamaLogger);
                 _modelInstances.TryAdd(modelOptions.Name, model);
                 return model;
             }
diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs
index 6faa3db2..dab34106 100644
--- a/LLama/LLamaInstructExecutor.cs
+++ b/LLama/LLamaInstructExecutor.cs
@@ -9,6 +9,7 @@ using System.Text.Json;
 using System.Text.Json.Serialization;
 using System.Threading.Tasks;
 using LLama.Extensions;
+using Microsoft.Extensions.Logging;
 
 namespace LLama
 {
@@ -27,10 +28,11 @@ namespace LLama
         /// 
         /// </summary>
         /// <param name="context"></param>
+        /// <param name="logger"></param>
         /// <param name="instructionPrefix"></param>
         /// <param name="instructionSuffix"></param>
-        public InstructExecutor(LLamaContext context, string instructionPrefix = "\n\n### Instruction:\n\n",
-            string instructionSuffix = "\n\n### Response:\n\n") : base(context)
+        public InstructExecutor(LLamaContext context, ILogger logger = null!, string instructionPrefix = "\n\n### Instruction:\n\n",
+            string instructionSuffix = "\n\n### Response:\n\n") : base(context, logger)
         {
             _inp_pfx = Context.Tokenize(instructionPrefix, true);
             _inp_sfx = Context.Tokenize(instructionSuffix, false);
diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs
index ab403212..0f374e09 100644
--- a/LLama/LLamaInteractExecutor.cs
+++ b/LLama/LLamaInteractExecutor.cs
@@ -9,6 +9,7 @@ using System.Text.Json;
 using System.Text.Json.Serialization;
 using System.Threading.Tasks;
 using LLama.Extensions;
+using Microsoft.Extensions.Logging;
 
 namespace LLama
 {
@@ -25,7 +26,8 @@ namespace LLama
         /// 
         /// </summary>
         /// <param name="context"></param>
-        public InteractiveExecutor(LLamaContext context) : base(context)
+        /// <param name="logger"></param>
+        public InteractiveExecutor(LLamaContext context, ILogger logger = null!) : base(context, logger)
         {
             _llama_token_newline = NativeApi.llama_token_nl(Context.NativeHandle);
         }
diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs
index 3ff755a0..e5348bb4 100644
--- a/LLama/LLamaStatelessExecutor.cs
+++ b/LLama/LLamaStatelessExecutor.cs
@@ -7,6 +7,7 @@ using System.Runtime.CompilerServices;
 using System.Threading;
 using System.Threading.Tasks;
 using LLama.Extensions;
+using Microsoft.Extensions.Logging;
 
 namespace LLama
 {
@@ -19,6 +20,7 @@ namespace LLama
     public class StatelessExecutor
         : ILLamaExecutor
     {
+        private readonly ILogger? _logger;
         private readonly LLamaWeights _weights;
         private readonly IModelParams _params;
 
@@ -32,8 +34,10 @@ namespace LLama
         /// </summary>
         /// <param name="weights"></param>
         /// <param name="params"></param>
-        public StatelessExecutor(LLamaWeights weights, IModelParams @params)
+        /// <param name="logger"></param>
+        public StatelessExecutor(LLamaWeights weights, IModelParams @params, ILogger logger = null!)
         {
+            _logger = logger;
             _weights = weights;
             _params = @params;
 
diff --git a/LLama/LLamaWeights.cs b/LLama/LLamaWeights.cs
index 1b067f1b..d841d5a9 100644
--- a/LLama/LLamaWeights.cs
+++ b/LLama/LLamaWeights.cs
@@ -3,6 +3,7 @@ using System.Text;
 using LLama.Abstractions;
 using LLama.Extensions;
 using LLama.Native;
+using Microsoft.Extensions.Logging;
 
 namespace LLama
 {
@@ -72,10 +73,11 @@ namespace LLama
         /// Create a llama_context using this model
         /// </summary>
         /// <param name="params"></param>
+        /// <param name="logger"></param>
         /// <returns></returns>
-        public LLamaContext CreateContext(IModelParams @params)
+        public LLamaContext CreateContext(IModelParams @params, ILogger logger = default!)
         {
-            return new LLamaContext(this, @params);
+            return new LLamaContext(this, @params, logger);
         }
     }
 }

From a8a498dc12c0c74c837e4a551c05c38e4c63aca5 Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Wed, 4 Oct 2023 16:32:13 +1300
Subject: [PATCH 5/7] Fix up issues found during testing

---
 LLama.Web/Extensions.cs                       | 2 +-
 LLama.Web/Hubs/SessionConnectionHub.cs        | 2 +-
 LLama.Web/Pages/Shared/_ChatTemplates.cshtml  | 2 +-
 LLama.Web/Pages/Shared/_Parameters.cshtml     | 1 -
 LLama.Web/Program.cs                          | 2 ++
 LLama.Web/Services/ModelService.cs            | 7 ++++---
 LLama.Web/appsettings.json                    | 6 +++---
 LLama.Web/wwwroot/js/sessionConnectionChat.js | 5 +++--
 8 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/LLama.Web/Extensions.cs b/LLama.Web/Extensions.cs
index 99f745dd..ee8d7f7f 100644
--- a/LLama.Web/Extensions.cs
+++ b/LLama.Web/Extensions.cs
@@ -33,7 +33,7 @@ namespace LLama.Web
         /// <returns>Combined list with duplicates removed</returns>
         private static List<string> CombineCSV(List<string> list, string csv)
         {
-            var results = list?.Count == 0
+            var results = list is null || list.Count == 0
                 ? CommaSeperatedToList(csv)
                 : CommaSeperatedToList(csv).Concat(list);
             return results
diff --git a/LLama.Web/Hubs/SessionConnectionHub.cs b/LLama.Web/Hubs/SessionConnectionHub.cs
index 24457683..966ec8a4 100644
--- a/LLama.Web/Hubs/SessionConnectionHub.cs
+++ b/LLama.Web/Hubs/SessionConnectionHub.cs
@@ -37,7 +37,7 @@ namespace LLama.Web.Hubs
 
 
         [HubMethodName("LoadModel")]
-        public async Task OnLoadModel(ISessionConfig sessionConfig, InferenceOptions inferenceConfig)
+        public async Task OnLoadModel(SessionConfig sessionConfig, InferenceOptions inferenceConfig)
         {
             _logger.Log(LogLevel.Information, "[OnLoadModel] - Load new model, Connection: {0}", Context.ConnectionId);
             await _modelSessionService.CloseAsync(Context.ConnectionId);
diff --git a/LLama.Web/Pages/Shared/_ChatTemplates.cshtml b/LLama.Web/Pages/Shared/_ChatTemplates.cshtml
index cd768f1f..624f5859 100644
--- a/LLama.Web/Pages/Shared/_ChatTemplates.cshtml
+++ b/LLama.Web/Pages/Shared/_ChatTemplates.cshtml
@@ -25,7 +25,7 @@
         <div class="m-2 me-4">
             <img src="~/image/robot.png" width="60"/>
         </div>
-        <div id="{{id}}" class="d-flex flex-column flex-fill justify-content-between">
+        <div id="{{uniqueId}}" class="d-flex flex-column flex-fill justify-content-between">
             <span class="content"><img src="~/image/loading.gif" width="30" /></span>
             <div class="d-flex justify-content-end">
                 <div class="d-flex flex-column align-items-end">
diff --git a/LLama.Web/Pages/Shared/_Parameters.cshtml b/LLama.Web/Pages/Shared/_Parameters.cshtml
index d6e476c4..76f3e321 100644
--- a/LLama.Web/Pages/Shared/_Parameters.cshtml
+++ b/LLama.Web/Pages/Shared/_Parameters.cshtml
@@ -1,7 +1,6 @@
 ﻿@page
 @using LLama.Common;
 @model LLama.Abstractions.IInferenceParams
-}
 
 <div class="d-flex flex-row gap-3">
     <div class="d-flex flex-column mb-2">
diff --git a/LLama.Web/Program.cs b/LLama.Web/Program.cs
index 7c4583d2..193090d0 100644
--- a/LLama.Web/Program.cs
+++ b/LLama.Web/Program.cs
@@ -14,6 +14,8 @@ namespace LLama.Web
             // Add services to the container.
             builder.Services.AddRazorPages();
             builder.Services.AddSignalR();
+            builder.Logging.ClearProviders();
+            builder.Services.AddLogging((loggingBuilder) => loggingBuilder.SetMinimumLevel(LogLevel.Trace).AddConsole());
 
             // Load InteractiveOptions
             builder.Services.AddOptions<LLamaOptions>()
diff --git a/LLama.Web/Services/ModelService.cs b/LLama.Web/Services/ModelService.cs
index dfb34bb6..3634f6ab 100644
--- a/LLama.Web/Services/ModelService.cs
+++ b/LLama.Web/Services/ModelService.cs
@@ -1,6 +1,7 @@
 ﻿using LLama.Web.Async;
 using LLama.Web.Common;
 using LLama.Web.Models;
+using Microsoft.Extensions.Options;
 using System.Collections.Concurrent;
 
 namespace LLama.Web.Services
@@ -11,10 +12,10 @@ namespace LLama.Web.Services
     /// </summary>
     public class ModelService : IModelService
     {
-        private readonly ILogger _llamaLogger;
         private readonly AsyncLock _modelLock;
         private readonly AsyncLock _contextLock;
         private readonly LLamaOptions _configuration;
+        private readonly ILogger<ModelService> _llamaLogger;
         private readonly ConcurrentDictionary<string, LLamaModel> _modelInstances;
 
 
@@ -23,12 +24,12 @@ namespace LLama.Web.Services
         /// </summary>
         /// <param name="logger">The logger.</param>
         /// <param name="options">The options.</param>
-        public ModelService(LLamaOptions configuration, ILogger llamaLogger)
+        public ModelService(IOptions<LLamaOptions> configuration, ILogger<ModelService> llamaLogger)
         {
             _llamaLogger = llamaLogger;
             _modelLock = new AsyncLock();
             _contextLock = new AsyncLock();
-            _configuration = configuration;
+            _configuration = configuration.Value;
             _modelInstances = new ConcurrentDictionary<string, LLamaModel>();
         }
 
diff --git a/LLama.Web/appsettings.json b/LLama.Web/appsettings.json
index 6231b882..82d62b1a 100644
--- a/LLama.Web/appsettings.json
+++ b/LLama.Web/appsettings.json
@@ -7,12 +7,12 @@
   },
   "AllowedHosts": "*",
   "LLamaOptions": {
-    "ModelLoadType": "Single",
+    "ModelLoadType": 0,
     "Models": [
       {
-        "Name": "WizardLM-7B",
+        "Name": "LLama2-7b-Chat",
         "MaxInstances": 20,
-        "ModelPath": "D:\\Repositories\\AI\\Models\\wizardLM-7B.ggmlv3.q4_0.bin",
+        "ModelPath": "..\\LLama.Unittest\\Models\\llama-2-7b-chat.Q4_0.gguf",
         "ContextSize": 2048,
         "BatchSize": 2048,
         "Threads": 4,
diff --git a/LLama.Web/wwwroot/js/sessionConnectionChat.js b/LLama.Web/wwwroot/js/sessionConnectionChat.js
index 719c44ac..24821150 100644
--- a/LLama.Web/wwwroot/js/sessionConnectionChat.js
+++ b/LLama.Web/wwwroot/js/sessionConnectionChat.js
@@ -43,8 +43,8 @@ const createConnectionSessionChat = () => {
             return;
 
         if (response.tokenType == Enums.TokenType.Begin) {
-            const uniqueId = randomString();
-            outputContainer.append(Mustache.render(outputBotTemplate, { id: uniqueId, ...response }));
+            let uniqueId = randomString();
+            outputContainer.append(Mustache.render(outputBotTemplate, { uniqueId: uniqueId, ...response }));
             responseContainer = $(`#${uniqueId}`);
             responseContent = responseContainer.find(".content");
             responseFirstToken = true;
@@ -102,6 +102,7 @@ const createConnectionSessionChat = () => {
     }
 
     const unloadModel = async () => {
+        await cancelPrompt();
         disableControls();
         enablePromptControls();
         $("#load").removeAttr("disabled");

From 4ec9aed47a0fe4bc6a2ffc14addf429ce8599846 Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Fri, 20 Oct 2023 08:29:26 +1300
Subject: [PATCH 6/7] Revert LLamasSharp project changes

---
 LLama/LLamaExecutorBase.cs      | 3 +--
 LLama/LLamaInstructExecutor.cs  | 2 +-
 LLama/LLamaInteractExecutor.cs  | 2 +-
 LLama/LLamaStatelessExecutor.cs | 3 +--
 LLama/LLamaWeights.cs           | 5 ++---
 5 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs
index 242ae10b..0c8e4679 100644
--- a/LLama/LLamaExecutorBase.cs
+++ b/LLama/LLamaExecutorBase.cs
@@ -75,10 +75,9 @@ namespace LLama
         /// </summary>
         /// <param name="context"></param>
         /// <param name="logger"></param>
-        protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null)
+        protected StatefulExecutorBase(LLamaContext context)
         {
             Context = context;
-            _logger = logger;
             _pastTokensCount = 0;
             _consumedTokensCount = 0;
             _n_session_consumed = 0;
diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs
index a4e7c0fd..c7cb55fe 100644
--- a/LLama/LLamaInstructExecutor.cs
+++ b/LLama/LLamaInstructExecutor.cs
@@ -32,7 +32,7 @@ namespace LLama
         /// <param name="instructionPrefix"></param>
         /// <param name="instructionSuffix"></param>
         public InstructExecutor(LLamaContext context, ILogger logger = null!, string instructionPrefix = "\n\n### Instruction:\n\n",
-            string instructionSuffix = "\n\n### Response:\n\n") : base(context, logger)
+            string instructionSuffix = "\n\n### Response:\n\n") : base(context)
         {
             _inp_pfx = Context.Tokenize(instructionPrefix, true);
             _inp_sfx = Context.Tokenize(instructionSuffix, false);
diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs
index 0f374e09..8247ca10 100644
--- a/LLama/LLamaInteractExecutor.cs
+++ b/LLama/LLamaInteractExecutor.cs
@@ -27,7 +27,7 @@ namespace LLama
         /// </summary>
         /// <param name="context"></param>
         /// <param name="logger"></param>
-        public InteractiveExecutor(LLamaContext context, ILogger logger = null!) : base(context, logger)
+        public InteractiveExecutor(LLamaContext context) : base(context)
         {
             _llama_token_newline = NativeApi.llama_token_nl(Context.NativeHandle);
         }
diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs
index 08a78f9e..d1b73c2f 100644
--- a/LLama/LLamaStatelessExecutor.cs
+++ b/LLama/LLamaStatelessExecutor.cs
@@ -36,9 +36,8 @@ namespace LLama
         /// <param name="weights"></param>
         /// <param name="params"></param>
         /// <param name="logger"></param>
-        public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger logger = null!)
+        public StatelessExecutor(LLamaWeights weights, IContextParams @params)
         {
-            _logger = logger;
             _weights = weights;
             _params = @params;
 
diff --git a/LLama/LLamaWeights.cs b/LLama/LLamaWeights.cs
index 76d46d25..5dc2024d 100644
--- a/LLama/LLamaWeights.cs
+++ b/LLama/LLamaWeights.cs
@@ -81,11 +81,10 @@ namespace LLama
         /// Create a llama_context using this model
         /// </summary>
         /// <param name="params"></param>
-        /// <param name="logger"></param>
         /// <returns></returns>
-        public LLamaContext CreateContext(IContextParams @params, ILogger logger = default!)
+        public LLamaContext CreateContext(IContextParams @params)
         {
-            return new LLamaContext(this, @params, logger);
+            return new LLamaContext(this, @params);
         }
     }
 }

From 952e77f97b5cba997cd3439e44ad247a0e83dab8 Mon Sep 17 00:00:00 2001
From: sa_ddam213 <sa_ddam213@live.com>
Date: Fri, 20 Oct 2023 08:33:27 +1300
Subject: [PATCH 7/7] Remove old parameter

---
 LLama.Web/Models/LLamaModel.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LLama.Web/Models/LLamaModel.cs b/LLama.Web/Models/LLamaModel.cs
index 5aedc5f5..61341d42 100644
--- a/LLama.Web/Models/LLamaModel.cs
+++ b/LLama.Web/Models/LLamaModel.cs
@@ -58,7 +58,7 @@ namespace LLama.Web.Models
             if (_config.MaxInstances > -1 && ContextCount >= _config.MaxInstances)
                 throw new Exception($"Maximum model instances reached");
 
-            context = _weights.CreateContext(_config, _llamaLogger);
+            context = _weights.CreateContext(_config);
             if (_contexts.TryAdd(contextName, context))
                 return Task.FromResult(context);