diff --git a/LLama/LLamaModel.cs b/LLama/LLamaModel.cs
index 830cbb24..84e545c3 100644
--- a/LLama/LLamaModel.cs
+++ b/LLama/LLamaModel.cs
@@ -36,9 +36,6 @@ namespace LLama
         int _n_session_consumed;
         List<llama_token> _embed;
 
-        // params related to chat API only
-        bool _first_time_chat = true;
-
         public string Name { get; set; }
         public SafeLLamaContextHandle NativeHandle => _ctx;
 
@@ -53,11 +50,12 @@ namespace LLama
             bool memory_f16 = true, bool random_prompt = false, bool use_color = false, bool interactive = false,
             bool embedding = false, bool interactive_first = false, bool prompt_cache_all = false, bool instruct = false, bool penalize_nl = true,
             bool perplexity = false, bool use_mmap = true, bool use_mlock = false, bool mem_test = false,
-            bool verbose_prompt = false) : this(new LLamaParams(seed, n_threads, n_predict, n_parts, n_ctx, n_batch, 
+            bool verbose_prompt = false, string encoding = "UTF-8") : this(new LLamaParams(seed, n_threads, n_predict, n_parts, n_ctx, n_batch, 
                 n_keep, n_gpu_layers, logit_bias, top_k, top_p, tfs_z, typical_p, temp, repeat_penalty, repeat_last_n, frequency_penalty, 
                 presence_penalty, mirostat, mirostat_tau, mirostat_eta, model_path, prompt, path_session, input_prefix, 
                 input_suffix, antiprompt, lora_adapter, lora_base, memory_f16, random_prompt, use_color, interactive, embedding, 
-                interactive_first, prompt_cache_all, instruct, penalize_nl, perplexity, use_mmap, use_mlock, mem_test, verbose_prompt), model_name, echo_input, verbose)
+                interactive_first, prompt_cache_all, instruct, penalize_nl, perplexity, use_mmap, use_mlock, mem_test, verbose_prompt), 
+                model_name, echo_input, verbose, encoding)
         {
             
         }
@@ -293,6 +291,25 @@ namespace LLama
             return Call(text, encoding);
         }
 
+        public void SaveState(string filename)
+        {
+            var stateSize = NativeApi.llama_get_state_size(_ctx);
+            byte[] stateMemory = new byte[stateSize];
+            NativeApi.llama_copy_state_data(_ctx, stateMemory);
+            File.WriteAllBytes(filename, stateMemory);
+        }
+
+        public void LoadState(string filename)
+        {
+            var stateMemory = File.ReadAllBytes(filename);
+            if(stateMemory.Length != (int)NativeApi.llama_get_state_size(_ctx))
+            {
+                throw new RuntimeError("Failed to validate state size.");
+            }
+            NativeApi.llama_set_state_data(_ctx, stateMemory);
+
+        }
+
         public IEnumerable<string> Call(string text, string encoding = "UTF-8")
         {
             _is_antiprompt = false;
@@ -507,9 +524,6 @@ namespace LLama
                 }
                 else
                 {
-                    // Assuming that the necessary variables have been defined and initialized,
-                    // the C# equivalent code could be:
-
                     while (_embed_inp.Count > _n_consumed)
                     {
                         _embed.Add(_embed_inp[_n_consumed]);
diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index 5e305942..fd1a31c2 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -8,7 +8,7 @@
     <Platforms>AnyCPU;x64</Platforms>
     <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
 
-    <Version>0.2.3</Version>
+    <Version>0.2.4</Version>
     <Authors>Yaohui Liu, Haiping Chen</Authors>
     <Company>SciSharp STACK</Company>
     <GeneratePackageOnBuild>true</GeneratePackageOnBuild>
@@ -21,7 +21,7 @@
       The .NET binding of LLama.cpp, providing APIs to run the model and deploy it on Web.
     </Description>
     <PackageReleaseNotes>
-      LLama 0.2.3 mainly fixed some BUGs of model inference.
+      LLama 0.2.4 mainly supports loading and saving session state.
     </PackageReleaseNotes>
     <PackageLicenseExpression>MIT</PackageLicenseExpression>
     <PackageOutputPath>packages</PackageOutputPath>
diff --git a/LLama/Native/GgmlInitParams.cs b/LLama/Native/GgmlInitParams.cs
new file mode 100644
index 00000000..834ceab9
--- /dev/null
+++ b/LLama/Native/GgmlInitParams.cs
@@ -0,0 +1,15 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace LLama.Native
+{
+    internal struct GgmlInitParams
+    {
+        public ulong mem_size;
+        public IntPtr mem_buffer;
+        [MarshalAs(UnmanagedType.I1)]
+        public bool no_alloc;
+    }
+}
diff --git a/LLama/Native/NativeApi.Quantize.cs b/LLama/Native/NativeApi.Quantize.cs
index 75e58096..31136fa3 100644
--- a/LLama/Native/NativeApi.Quantize.cs
+++ b/LLama/Native/NativeApi.Quantize.cs
@@ -5,7 +5,7 @@ using System.Text;
 
 namespace LLama.Native
 {
-    internal partial class NativeApi
+    public partial class NativeApi
     {
         /// <summary>
         /// Returns 0 on success
diff --git a/LLama/Native/NativeApi.Sampling.cs b/LLama/Native/NativeApi.Sampling.cs
index aa6a1d45..83c52320 100644
--- a/LLama/Native/NativeApi.Sampling.cs
+++ b/LLama/Native/NativeApi.Sampling.cs
@@ -6,7 +6,7 @@ using System.Text;
 namespace LLama.Native
 {
     using llama_token = Int32;
-    internal unsafe partial class NativeApi
+    public unsafe partial class NativeApi
     {
         /// <summary>
         /// Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
index 66986a3b..dcb6b19b 100644
--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -8,7 +8,7 @@ using LLama.Exceptions;
 namespace LLama.Native
 {
     using llama_token = Int32;
-    internal unsafe partial class NativeApi
+    public unsafe partial class NativeApi
     {
         static NativeApi()
         {