diff --git a/LLama.Web/Async/AsyncLock.cs b/LLama.Web/Async/AsyncLock.cs
new file mode 100644
index 00000000..09ccb0f7
--- /dev/null
+++ b/LLama.Web/Async/AsyncLock.cs
@@ -0,0 +1,55 @@
+﻿namespace LLama.Web.Async
+{
+    /// <summary>
+    /// Create an Async locking using statment
+    /// </summary>
+    public sealed class AsyncLock
+    {
+        private readonly SemaphoreSlim _semaphore;
+        private readonly Task<IDisposable> _releaser;
+
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="AsyncLock"/> class.
+        /// </summary>
+        public AsyncLock()
+        {
+            _semaphore = new SemaphoreSlim(1, 1);
+            _releaser = Task.FromResult((IDisposable)new Releaser(this));
+        }
+
+
+        /// <summary>
+        /// Locks the using statement asynchronously.
+        /// </summary>
+        /// <returns></returns>
+        public Task<IDisposable> LockAsync()
+        {
+            var wait = _semaphore.WaitAsync();
+            if (wait.IsCompleted)
+                return _releaser;
+
+            return wait.ContinueWith((_, state) => (IDisposable)state, _releaser.Result, CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default);
+        }
+
+
+        /// <summary>
+        /// IDisposable wrapper class to release the lock on dispose
+        /// </summary>
+        /// <seealso cref="IDisposable" />
+        private sealed class Releaser : IDisposable
+        {
+            private readonly AsyncLock _lockToRelease;
+
+            internal Releaser(AsyncLock lockToRelease)
+            {
+                _lockToRelease = lockToRelease;
+            }
+
+            public void Dispose()
+            {
+                _lockToRelease._semaphore.Release();
+            }
+        }
+    }
+}
diff --git a/LLama.Web/Common/LLamaOptions.cs b/LLama.Web/Common/LLamaOptions.cs
index 1ac0d829..a64b9635 100644
--- a/LLama.Web/Common/LLamaOptions.cs
+++ b/LLama.Web/Common/LLamaOptions.cs
@@ -2,6 +2,7 @@
 {
     public class LLamaOptions
     {
+        public ModelLoadType ModelLoadType { get; set; }
         public List<ModelOptions> Models { get; set; }
         public List<PromptOptions> Prompts { get; set; } = new List<PromptOptions>();
         public List<ParameterOptions> Parameters { get; set; } = new List<ParameterOptions>();
diff --git a/LLama.Web/Common/ModelLoadType.cs b/LLama.Web/Common/ModelLoadType.cs
new file mode 100644
index 00000000..9e1c77b7
--- /dev/null
+++ b/LLama.Web/Common/ModelLoadType.cs
@@ -0,0 +1,30 @@
+﻿namespace LLama.Web.Common
+{
+    /// <summary>
+    /// The type of model load caching to use
+    /// </summary>
+    public enum ModelLoadType
+    {
+
+        /// <summary>
+        /// Only one model will be loaded into memory at a time, any other models will be unloaded before the new one is loaded
+        /// </summary>
+        Single = 0,
+
+        /// <summary>
+        /// Multiple models will be loaded into memory, ensure you use the ModelConfigs to split the hardware resources
+        /// </summary>
+        Multiple = 1,
+
+        /// <summary>
+        /// The first model in the appsettings.json list will be preloaded into memory at app startup
+        /// </summary>
+        PreloadSingle = 2,
+
+
+        /// <summary>
+        /// All models in the appsettings.json list will be preloaded into memory at app startup, ensure you use the ModelConfigs to split the hardware resources
+        /// </summary>
+        PreloadMultiple = 3,
+    }
+}
diff --git a/LLama.Web/Common/ModelOptions.cs b/LLama.Web/Common/ModelOptions.cs
index f06757e3..c6cf0988 100644
--- a/LLama.Web/Common/ModelOptions.cs
+++ b/LLama.Web/Common/ModelOptions.cs
@@ -3,105 +3,123 @@ using LLama.Abstractions;
 
 namespace LLama.Web.Common
 {
-    public class ModelOptions
-        : IModelParams
+    public class ModelOptions : IModelParams
     {
-      
+        /// <summary>
+        /// Model friendly name
+        /// </summary>
         public string Name { get; set; }
+
+        /// <summary>
+        /// Max context insta=nces allowed per model
+        /// </summary>
         public int MaxInstances { get; set; }
 
+        /// <summary>
+        /// Model context size (n_ctx)
+        /// </summary>
+        public int ContextSize { get; set; } = 512;
+
+        /// <summary>
+        /// the GPU that is used for scratch and small tensors
+        /// </summary>
+        public int MainGpu { get; set; } = 0;
+
+        /// <summary>
+        /// if true, reduce VRAM usage at the cost of performance
+        /// </summary>
+        public bool LowVram { get; set; } = false;
+
+        /// <summary>
+        /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
+        /// </summary>
+        public int GpuLayerCount { get; set; } = 20;
+
+        /// <summary>
+        /// Seed for the random number generator (seed)
+        /// </summary>
+        public int Seed { get; set; } = 1686349486;
+
+        /// <summary>
+        /// Use f16 instead of f32 for memory kv (memory_f16)
+        /// </summary>
+        public bool UseFp16Memory { get; set; } = true;
+
+        /// <summary>
+        /// Use mmap for faster loads (use_mmap)
+        /// </summary>
+        public bool UseMemorymap { get; set; } = true;
+
+        /// <summary>
+        /// Use mlock to keep model in memory (use_mlock)
+        /// </summary>
+        public bool UseMemoryLock { get; set; } = false;
+
+        /// <summary>
+        /// Compute perplexity over the prompt (perplexity)
+        /// </summary>
+        public bool Perplexity { get; set; } = false;
+
+        /// <summary>
+        /// Model path (model)
+        /// </summary>
+        public string ModelPath { get; set; }
+
+        /// <summary>
+        /// model alias
+        /// </summary>
+        public string ModelAlias { get; set; } = "unknown";
 
-		/// <summary>
-		/// Model context size (n_ctx)
-		/// </summary>
-		public int ContextSize { get; set; } = 512;
-		/// <summary>
-		/// the GPU that is used for scratch and small tensors
-		/// </summary>
-		public int MainGpu { get; set; } = 0;
-		/// <summary>
-		/// if true, reduce VRAM usage at the cost of performance
-		/// </summary>
-		public bool LowVram { get; set; } = false;
-		/// <summary>
-		/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
-		/// </summary>
-		public int GpuLayerCount { get; set; } = 20;
-		/// <summary>
-		/// Seed for the random number generator (seed)
-		/// </summary>
-		public int Seed { get; set; } = 1686349486;
-		/// <summary>
-		/// Use f16 instead of f32 for memory kv (memory_f16)
-		/// </summary>
-		public bool UseFp16Memory { get; set; } = true;
-		/// <summary>
-		/// Use mmap for faster loads (use_mmap)
-		/// </summary>
-		public bool UseMemorymap { get; set; } = true;
-		/// <summary>
-		/// Use mlock to keep model in memory (use_mlock)
-		/// </summary>
-		public bool UseMemoryLock { get; set; } = false;
-		/// <summary>
-		/// Compute perplexity over the prompt (perplexity)
-		/// </summary>
-		public bool Perplexity { get; set; } = false;
-		/// <summary>
-		/// Model path (model)
-		/// </summary>
-		public string ModelPath { get; set; }
-		/// <summary>
-		/// model alias
-		/// </summary>
-		public string ModelAlias { get; set; } = "unknown";
-		/// <summary>
-		/// lora adapter path (lora_adapter)
-		/// </summary>
-		public string LoraAdapter { get; set; } = string.Empty;
-		/// <summary>
-		/// base model path for the lora adapter (lora_base)
-		/// </summary>
-		public string LoraBase { get; set; } = string.Empty;
-		/// <summary>
-		/// Number of threads (-1 = autodetect) (n_threads)
-		/// </summary>
-		public int Threads { get; set; } = Math.Max(Environment.ProcessorCount / 2, 1);
-		/// <summary>
-		/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
-		/// </summary>
-		public int BatchSize { get; set; } = 512;
-
-		/// <summary>
-		/// Whether to convert eos to newline during the inference.
-		/// </summary>
-		public bool ConvertEosToNewLine { get; set; } = false;
-
-		/// <summary>
-		/// Whether to use embedding mode. (embedding) Note that if this is set to true, 
-		/// The LLamaModel won't produce text response anymore.
-		/// </summary>
-		public bool EmbeddingMode { get; set; } = false;
-
-		/// <summary>
-		/// how split tensors should be distributed across GPUs
-		/// </summary>
-		public float[] TensorSplits { get; set; }
-
-		/// <summary>
-		/// RoPE base frequency
-		/// </summary>
-		public float RopeFrequencyBase { get; set; } = 10000.0f;
-
-		/// <summary>
-		/// RoPE frequency scaling factor
-		/// </summary>
-		public float RopeFrequencyScale { get; set; } = 1.0f;
-
-		/// <summary>
-		/// Use experimental mul_mat_q kernels
-		/// </summary>
-		public bool MulMatQ { get; set; }
+        /// <summary>
+        /// lora adapter path (lora_adapter)
+        /// </summary>
+        public string LoraAdapter { get; set; } = string.Empty;
+
+        /// <summary>
+        /// base model path for the lora adapter (lora_base)
+        /// </summary>
+        public string LoraBase { get; set; } = string.Empty;
+
+        /// <summary>
+        /// Number of threads (-1 = autodetect) (n_threads)
+        /// </summary>
+        public int Threads { get; set; } = Math.Max(Environment.ProcessorCount / 2, 1);
+
+        /// <summary>
+        /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
+        /// </summary>
+        public int BatchSize { get; set; } = 512;
+
+        /// <summary>
+        /// Whether to convert eos to newline during the inference.
+        /// </summary>
+        public bool ConvertEosToNewLine { get; set; } = false;
+
+        /// <summary>
+        /// Whether to use embedding mode. (embedding) Note that if this is set to true, 
+        /// The LLamaModel won't produce text response anymore.
+        /// </summary>
+        public bool EmbeddingMode { get; set; } = false;
+
+        /// <summary>
+        /// how split tensors should be distributed across GPUs
+        /// </summary>
+        public float[] TensorSplits { get; set; }
+
+        /// <summary>
+        /// RoPE base frequency
+        /// </summary>
+        public float RopeFrequencyBase { get; set; } = 10000.0f;
+
+        /// <summary>
+        /// RoPE frequency scaling factor
+        /// </summary>
+        public float RopeFrequencyScale { get; set; } = 1.0f;
+
+        /// <summary>
+        /// Use experimental mul_mat_q kernels
+        /// </summary>
+        public bool MulMatQ { get; set; }
 
         /// <summary>
         /// The encoding to use for models
diff --git a/LLama.Web/LLamaModel.cs b/LLama.Web/LLamaModel.cs
new file mode 100644
index 00000000..e500ba04
--- /dev/null
+++ b/LLama.Web/LLamaModel.cs
@@ -0,0 +1,106 @@
+﻿using LLama.Abstractions;
+using LLama.Web.Common;
+using System.Collections.Concurrent;
+
+namespace LLama.Web
+{
+    /// <summary>
+    /// Wrapper class for LLamaSharp LLamaWeights
+    /// </summary>
+    /// <seealso cref="System.IDisposable" />
+    public class LLamaModel : IDisposable
+    {
+        private readonly ModelOptions _config;
+        private readonly LLamaWeights _weights;
+        private readonly ConcurrentDictionary<string, LLamaContext> _contexts;
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="LLamaModel"/> class.
+        /// </summary>
+        /// <param name="modelParams">The model parameters.</param>
+        public LLamaModel(ModelOptions modelParams)
+        {
+            _config = modelParams;
+            _weights = LLamaWeights.LoadFromFile(modelParams);
+            _contexts = new ConcurrentDictionary<string, LLamaContext>();
+        }
+
+        /// <summary>
+        /// Gets the model configuration.
+        /// </summary>
+        public IModelParams ModelParams => _config;
+
+        /// <summary>
+        /// Gets the LLamaWeights
+        /// </summary>
+        public LLamaWeights LLamaWeights => _weights;
+
+
+        /// <summary>
+        /// Gets the context count.
+        /// </summary>
+        public int ContextCount => _contexts.Count;
+
+
+        /// <summary>
+        /// Creates a new context session on this model
+        /// </summary>
+        /// <param name="contextName">The unique context identifier</param>
+        /// <returns>LLamaModelContext for this LLamaModel</returns>
+        /// <exception cref="Exception">Context exists</exception>
+        public Task<LLamaContext> CreateContext(string contextName)
+        {
+            if (_contexts.TryGetValue(contextName, out var context))
+                throw new Exception($"Context with id {contextName} already exists.");
+
+            if (_config.MaxInstances > -1 && ContextCount >= _config.MaxInstances)
+                throw new Exception($"Maximum model instances reached");
+
+            context = _weights.CreateContext(_config);
+            if (_contexts.TryAdd(contextName, context))
+                return Task.FromResult(context);
+
+            return Task.FromResult<LLamaContext>(null);
+        }
+
+        /// <summary>
+        /// Get a contexts belonging to this model
+        /// </summary>
+        /// <param name="contextName">The unique context identifier</param>
+        /// <returns>LLamaModelContext for this LLamaModel with the specified contextName</returns>
+        public Task<LLamaContext> GetContext(string contextName)
+        {
+            if (_contexts.TryGetValue(contextName, out var context))
+                return Task.FromResult(context);
+
+            return Task.FromResult<LLamaContext>(null);
+        }
+
+        /// <summary>
+        /// Remove a context from this model
+        /// </summary>
+        /// <param name="contextName">The unique context identifier</param>
+        /// <returns>true if removed, otherwise false</returns>
+        public Task<bool> RemoveContext(string contextName)
+        {
+            if (!_contexts.TryRemove(contextName, out var context))
+                return Task.FromResult(false);
+
+            context?.Dispose();
+            return Task.FromResult(true);
+        }
+
+
+        /// <summary>
+        /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
+        /// </summary>
+        public void Dispose()
+        {
+            foreach (var context in _contexts.Values)
+            {
+                context?.Dispose();
+            }
+            _weights.Dispose();
+        }
+    }
+}
diff --git a/LLama.Web/Services/IModelService.cs b/LLama.Web/Services/IModelService.cs
new file mode 100644
index 00000000..0a98f8f4
--- /dev/null
+++ b/LLama.Web/Services/IModelService.cs
@@ -0,0 +1,76 @@
+﻿using LLama.Web.Common;
+
+namespace LLama.Web.Services
+{
+    /// <summary>
+    /// Service for managing language Models
+    /// </summary>
+    public interface IModelService
+    {
+        /// <summary>
+        /// Gets the model with the specified name.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        Task<LLamaModel> GetModel(string modelName);
+
+
+        /// <summary>
+        /// Loads a model from a ModelConfig object.
+        /// </summary>
+        /// <param name="modelOptions">The model configuration.</param>
+        Task<LLamaModel> LoadModel(ModelOptions modelOptions);
+
+
+        /// <summary>
+        /// Loads all models found in appsettings.json
+        /// </summary>
+        Task LoadModels();
+
+
+        /// <summary>
+        /// Unloads the model with the specified name.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        Task UnloadModel(string modelName);
+
+
+        /// <summary>
+        /// Unloads all models.
+        /// </summary>
+        Task UnloadModels();
+
+
+        /// <summary>
+        /// Gets a context with the specified identifier
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The context identifier.</param>
+        Task<LLamaContext> GetContext(string modelName, string contextName);
+
+
+        /// <summary>
+        /// Removes the context.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The context identifier.</param>
+        Task<bool> RemoveContext(string modelName, string contextName);
+
+
+        /// <summary>
+        /// Creates a context.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The context identifier.</param>
+        Task<LLamaContext> CreateContext(string modelName, string contextName);
+
+
+        /// <summary>
+        /// Gets the or create model and context.
+        /// This will load a model from disk if not already loaded, and also create the context
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The context identifier.</param>
+        /// <returns>Both loaded Model and Context</returns>
+        Task<(LLamaModel, LLamaContext)> GetOrCreateModelAndContext(string modelName, string contextName);
+    }
+}
\ No newline at end of file
diff --git a/LLama.Web/Services/ModelService.cs b/LLama.Web/Services/ModelService.cs
new file mode 100644
index 00000000..16365a5d
--- /dev/null
+++ b/LLama.Web/Services/ModelService.cs
@@ -0,0 +1,202 @@
+﻿using LLama.Web.Async;
+using LLama.Web.Common;
+using System.Collections.Concurrent;
+
+namespace LLama.Web.Services
+{
+
+    /// <summary>
+    /// Sercive for handling Models,Weights & Contexts
+    /// </summary>
+    public class ModelService : IModelService
+    {
+        private readonly AsyncLock _modelLock;
+        private readonly AsyncLock _contextLock;
+        private readonly LLamaOptions _configuration;
+        private readonly ConcurrentDictionary<string, LLamaModel> _modelInstances;
+
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="ModelService"/> class.
+        /// </summary>
+        /// <param name="logger">The logger.</param>
+        /// <param name="options">The options.</param>
+        public ModelService(LLamaOptions configuration)
+        {
+            _modelLock = new AsyncLock();
+            _contextLock = new AsyncLock();
+            _configuration = configuration;
+            _modelInstances = new ConcurrentDictionary<string, LLamaModel>();
+        }
+
+
+        /// <summary>
+        /// Loads a model with the provided configuration.
+        /// </summary>
+        /// <param name="modelOptions">The model configuration.</param>
+        /// <returns></returns>
+        public async Task<LLamaModel> LoadModel(ModelOptions modelOptions)
+        {
+            if (_modelInstances.TryGetValue(modelOptions.Name, out var existingModel))
+                return existingModel;
+
+            using (await _modelLock.LockAsync())
+            {
+                if (_modelInstances.TryGetValue(modelOptions.Name, out var model))
+                    return model;
+
+                // If in single mode unload any other models
+                if (_configuration.ModelLoadType == ModelLoadType.Single
+                 || _configuration.ModelLoadType == ModelLoadType.PreloadSingle)
+                    await UnloadModels();
+
+
+                model = new LLamaModel(modelOptions);
+                _modelInstances.TryAdd(modelOptions.Name, model);
+                return model;
+            }
+        }
+
+
+        /// <summary>
+        /// Loads the models.
+        /// </summary>
+        public async Task LoadModels()
+        {
+            if (_configuration.ModelLoadType == ModelLoadType.Single
+             || _configuration.ModelLoadType == ModelLoadType.Multiple)
+                return;
+
+            foreach (var modelConfig in _configuration.Models)
+            {
+                await LoadModel(modelConfig);
+
+                //Only preload first model if in SinglePreload mode
+                if (_configuration.ModelLoadType == ModelLoadType.PreloadSingle)
+                    break;
+            }
+        }
+
+
+        /// <summary>
+        /// Unloads the model.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <returns></returns>
+        public Task UnloadModel(string modelName)
+        {
+            if (_modelInstances.TryRemove(modelName, out var model))
+            {
+                model?.Dispose();
+                return Task.FromResult(true);
+            }
+            return Task.FromResult(false);
+        }
+
+
+
+        /// <summary>
+        /// Unloads all models.
+        /// </summary>
+        public async Task UnloadModels()
+        {
+            foreach (var modelName in _modelInstances.Keys)
+            {
+                await UnloadModel(modelName);
+            }
+        }
+
+
+        /// <summary>
+        /// Gets a model ny name.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <returns></returns>
+        public Task<LLamaModel> GetModel(string modelName)
+        {
+            _modelInstances.TryGetValue(modelName, out var model);
+            return Task.FromResult(model);
+        }
+
+
+        /// <summary>
+        /// Gets a context from the specified model.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The contextName.</param>
+        /// <returns></returns>
+        /// <exception cref="System.Exception">Model not found</exception>
+        public async Task<LLamaContext> GetContext(string modelName, string contextName)
+        {
+            if (!_modelInstances.TryGetValue(modelName, out var model))
+                throw new Exception("Model not found");
+
+            return await model.GetContext(contextName);
+        }
+
+
+        /// <summary>
+        /// Creates a context on the specified model.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The contextName.</param>
+        /// <returns></returns>
+        /// <exception cref="System.Exception">Model not found</exception>
+        public async Task<LLamaContext> CreateContext(string modelName, string contextName)
+        {
+            if (!_modelInstances.TryGetValue(modelName, out var model))
+                throw new Exception("Model not found");
+
+            using (await _contextLock.LockAsync())
+            {
+                return await model.CreateContext(contextName);
+            }
+        }
+
+
+        /// <summary>
+        /// Removes a context from the specified model.
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The contextName.</param>
+        /// <returns></returns>
+        /// <exception cref="System.Exception">Model not found</exception>
+        public async Task<bool> RemoveContext(string modelName, string contextName)
+        {
+            if (!_modelInstances.TryGetValue(modelName, out var model))
+                throw new Exception("Model not found");
+
+            using (await _contextLock.LockAsync())
+            {
+                return await model.RemoveContext(contextName);
+            }
+        }
+
+
+        /// <summary>
+        /// Loads, Gets,Creates a Model and a Context
+        /// </summary>
+        /// <param name="modelName">Name of the model.</param>
+        /// <param name="contextName">The contextName.</param>
+        /// <returns></returns>
+        /// <exception cref="System.Exception">Model option '{modelName}' not found</exception>
+        public async Task<(LLamaModel, LLamaContext)> GetOrCreateModelAndContext(string modelName, string contextName)
+        {
+            if (_modelInstances.TryGetValue(modelName, out var model))
+                return (model, await model.GetContext(contextName) ?? await model.CreateContext(contextName));
+
+
+            // Get model configuration
+            var modelConfig = _configuration.Models.FirstOrDefault(x => x.Name == modelName);
+            if (modelConfig is null)
+                throw new Exception($"Model option '{modelName}' not found");
+
+            // Load Model
+            model = await LoadModel(modelConfig);
+
+            // Get or Create Context
+            return (model, await model.GetContext(contextName) ?? await model.CreateContext(contextName));
+        }
+
+    }
+}