From 13a312b4ecb7f1ca65650b2b275b868271d4ec88 Mon Sep 17 00:00:00 2001
From: xbotter <xbotter@live.cn>
Date: Mon, 11 Dec 2023 19:39:01 +0800
Subject: [PATCH 1/4] update sk to 1.0.0-rc3 & km to 0.18

---
 LLama.Examples/Examples/KernelMemory.cs       |  6 +++
 LLama.Examples/Examples/SemanticKernelChat.cs |  4 +-
 .../Examples/SemanticKernelPrompt.cs          | 14 ++---
 LLama.Examples/LLama.Examples.csproj          | 24 ++-------
 LLama.KernelMemory/BuilderExtensions.cs       | 36 +++++--------
 .../LLamaSharp.KernelMemory.csproj            |  4 +-
 ...cs => LLamaSharpTextEmbeddingGenerator.cs} | 31 +++++++----
 ...neration.cs => LlamaSharpTextGenerator.cs} | 17 +++++--
 .../ChatCompletion/ChatRequestSettings.cs     |  4 +-
 .../LLamaSharpChatCompletion.cs               | 51 ++++++++++++-------
 .../ChatCompletion/LLamaSharpChatMessage.cs   | 14 -----
 .../ChatCompletion/LLamaSharpChatResult.cs    | 44 ----------------
 .../LLamaSharp.SemanticKernel.csproj          |  3 +-
 .../LLamaSharpTextCompletion.cs               | 37 +++++++++-----
 .../TextCompletion/LLamaTextResult.cs         | 37 --------------
 .../LLamaSharpEmbeddingGeneration.cs          |  7 +--
 .../ChatRequestSettingsTests.cs               |  6 +--
 LLama/LLamaEmbedder.cs                        |  8 +++
 18 files changed, 146 insertions(+), 201 deletions(-)
 rename LLama.KernelMemory/{LLamaSharpTextEmbeddingGeneration.cs => LLamaSharpTextEmbeddingGenerator.cs} (72%)
 rename LLama.KernelMemory/{LlamaSharpTextGeneration.cs => LlamaSharpTextGenerator.cs} (86%)
 delete mode 100644 LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
 delete mode 100644 LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
 delete mode 100644 LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
diff --git a/LLama.Examples/Examples/KernelMemory.cs b/LLama.Examples/Examples/KernelMemory.cs
index 0aea3d7a..a204600b 100644
--- a/LLama.Examples/Examples/KernelMemory.cs
+++ b/LLama.Examples/Examples/KernelMemory.cs
@@ -16,6 +16,11 @@ namespace LLama.Examples.Examples
             Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs");
             Console.Write("Please input your model path: ");
             var modelPath = Console.ReadLine();
+            var searchClientConfig = new SearchClientConfig
+            {
+                MaxMatchesCount = 1,
+                AnswerTokens = 100,
+            };
             var memory = new KernelMemoryBuilder()
                     .WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath)
                     {
@@ -24,6 +29,7 @@ namespace LLama.Examples.Examples
                             AntiPrompts = new List<string> { "\n\n" }
                         }
                     })
+                    .WithSearchClientConfig(searchClientConfig)
                     .With(new TextPartitioningOptions
                     {
                         MaxTokensPerParagraph = 300,
diff --git a/LLama.Examples/Examples/SemanticKernelChat.cs b/LLama.Examples/Examples/SemanticKernelChat.cs
index 39870f1b..a9d5be54 100644
--- a/LLama.Examples/Examples/SemanticKernelChat.cs
+++ b/LLama.Examples/Examples/SemanticKernelChat.cs
@@ -29,7 +29,7 @@ namespace LLama.Examples.Examples
             await MessageOutputAsync(chatHistory);
 
             // First bot assistant message
-            string reply = await chatGPT.GenerateMessageAsync(chatHistory);
+            string reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
             chatHistory.AddAssistantMessage(reply);
             await MessageOutputAsync(chatHistory);
 
@@ -38,7 +38,7 @@ namespace LLama.Examples.Examples
             await MessageOutputAsync(chatHistory);
 
             // Second bot assistant message
-            reply = await chatGPT.GenerateMessageAsync(chatHistory);
+            reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
             chatHistory.AddAssistantMessage(reply);
             await MessageOutputAsync(chatHistory);
         }
diff --git a/LLama.Examples/Examples/SemanticKernelPrompt.cs b/LLama.Examples/Examples/SemanticKernelPrompt.cs
index c4974d67..21cb55de 100644
--- a/LLama.Examples/Examples/SemanticKernelPrompt.cs
+++ b/LLama.Examples/Examples/SemanticKernelPrompt.cs
@@ -2,8 +2,9 @@
 using LLama.Common;
 using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel;
-using Microsoft.SemanticKernel.AI.TextCompletion;
 using LLamaSharp.SemanticKernel.TextCompletion;
+using Microsoft.SemanticKernel.AI.TextGeneration;
+using Microsoft.Extensions.DependencyInjection;
 
 namespace LLama.Examples.Examples
 {
@@ -21,7 +22,7 @@ namespace LLama.Examples.Examples
             var ex = new StatelessExecutor(model, parameters);
 
             var builder = new KernelBuilder();
-            builder.WithAIService<ITextCompletion>("local-llama", new LLamaSharpTextCompletion(ex), true);
+            builder.Services.AddKeyedSingleton<ITextGenerationService>("local-llama", new LLamaSharpTextCompletion(ex));
 
             var kernel = builder.Build();
 
@@ -29,8 +30,8 @@ namespace LLama.Examples.Examples
 
 One line TLDR with the fewest words.";
 
-            ChatRequestSettings settings = new() {MaxTokens = 100};
-            var summarize = kernel.CreateSemanticFunction(prompt, requestSettings: settings);
+            ChatRequestSettings settings = new() { MaxTokens = 100 };
+            var summarize = kernel.CreateFunctionFromPrompt(prompt, settings);
 
             string text1 = @"
 1st Law of Thermodynamics - Energy cannot be created or destroyed.
@@ -42,10 +43,9 @@ One line TLDR with the fewest words.";
 2. The acceleration of an object depends on the mass of the object and the amount of force applied.
 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";
 
-            Console.WriteLine((await kernel.RunAsync(text1, summarize)).GetValue<string>());
+            Console.WriteLine((await kernel.InvokeAsync(summarize,new KernelArguments(text1))).GetValue<string>());
 
-            Console.WriteLine((await kernel.RunAsync(text2, summarize)).GetValue<string>());
+            Console.WriteLine((await kernel.InvokeAsync(summarize, new KernelArguments(text2))).GetValue<string>());
         }
     }
 }
- 
\ No newline at end of file
diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj
index d158f05f..9e4f17ab 100644
--- a/LLama.Examples/LLama.Examples.csproj
+++ b/LLama.Examples/LLama.Examples.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Microsoft.NET.Sdk">
+﻿<Project Sdk="Microsoft.NET.Sdk">
   <Import Project="..\LLama\LLamaSharp.Runtime.targets" />
   <PropertyGroup>
     <OutputType>Exe</OutputType>
@@ -9,28 +9,14 @@
     <!-- Set IncludeBuiltInRuntimes to false to include your own runtime libraries and not link the defaults -->
     <IncludeBuiltInRuntimes>true</IncludeBuiltInRuntimes>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
-    <NoWarn>1701;1702;8604</NoWarn>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
-    <NoWarn>1701;1702;8604</NoWarn>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <NoWarn>1701;1702;8604</NoWarn>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <NoWarn>1701;1702;8604</NoWarn>
+	  <LangVersion>12</LangVersion>
+	<NoWarn>1701;1702;8604;SKEXP0001;SKEXP0052;SKEXP0003</NoWarn>
   </PropertyGroup>
 
   <ItemGroup>
     <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
-    <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.12.231123.1-preview" />
-    <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta8" />
+    <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.18.231209.1-preview" />
+    <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-rc3" />
     <PackageReference Include="Spectre.Console" Version="0.48.0" />
   </ItemGroup>
 
diff --git a/LLama.KernelMemory/BuilderExtensions.cs b/LLama.KernelMemory/BuilderExtensions.cs
index 5f476ce8..7afac4bb 100644
--- a/LLama.KernelMemory/BuilderExtensions.cs
+++ b/LLama.KernelMemory/BuilderExtensions.cs
@@ -17,19 +17,6 @@ namespace LLamaSharp.KernelMemory
     public static class BuilderExtensions
     {
 
-        private static IKernelMemoryBuilder WithCustomEmbeddingGeneration(this IKernelMemoryBuilder builder, ITextEmbeddingGeneration embeddingGeneration)
-        {
-            builder.AddSingleton<ITextEmbeddingGeneration>(embeddingGeneration);
-            builder.AddIngestionEmbeddingGenerator(embeddingGeneration);
-            return builder;
-        }
-
-        private static IKernelMemoryBuilder WithCustomTextGeneration(this IKernelMemoryBuilder builder, ITextGeneration textGeneration)
-        {
-            builder.AddSingleton<ITextGeneration>(textGeneration);
-            return builder;
-        }
-
         /// <summary>
         /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder.
         /// </summary>
@@ -38,7 +25,9 @@ namespace LLamaSharp.KernelMemory
         /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added.</returns>
         public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config)
         {
-            builder.WithCustomEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(config));
+            var generator = new LLamaSharpTextEmbeddingGenerator(config);
+            builder.AddSingleton<ITextEmbeddingGenerator>(generator);
+            builder.AddIngestionEmbeddingGenerator(generator);
             return builder;
         }
 
@@ -46,11 +35,12 @@ namespace LLamaSharp.KernelMemory
         /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder.
         /// </summary>
         /// <param name="builder">The KernelMemoryBuilder instance.</param>
-        /// <param name="textEmbeddingGeneration">The LLamaSharpTextEmbeddingGeneration instance.</param>
+        /// <param name="textEmbeddingGenerator">The LLamaSharpTextEmbeddingGeneration instance.</param>
         /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added.</returns>
-        public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGeneration textEmbeddingGeneration)
+        public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGenerator textEmbeddingGenerator)
         {
-            builder.WithCustomEmbeddingGeneration(textEmbeddingGeneration);
+            builder.AddSingleton<ITextEmbeddingGenerator>(textEmbeddingGenerator);
+            builder.AddIngestionEmbeddingGenerator(textEmbeddingGenerator);
             return builder;
         }
 
@@ -62,7 +52,7 @@ namespace LLamaSharp.KernelMemory
         /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextGeneration added.</returns>
         public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config)
         {
-            builder.WithCustomTextGeneration(new LlamaSharpTextGeneration(config));
+            builder.AddSingleton<ITextGenerator>(new LlamaSharpTextGenerator(config));
             return builder;
         }
 
@@ -70,11 +60,11 @@ namespace LLamaSharp.KernelMemory
         /// Adds LLamaSharpTextGeneration to the KernelMemoryBuilder.
         /// </summary>
         /// <param name="builder">The KernelMemoryBuilder instance.</param>
-        /// <param name="textGeneration">The LlamaSharpTextGeneration instance.</param>
+        /// <param name="textGenerator">The LlamaSharpTextGeneration instance.</param>
         /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextGeneration added.</returns>
-        public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGeneration textGeneration)
+        public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGenerator textGenerator)
         {
-            builder.WithCustomTextGeneration(textGeneration);
+            builder.AddSingleton<ITextGenerator>(textGenerator);
             return builder;
         }
 
@@ -96,8 +86,8 @@ namespace LLamaSharp.KernelMemory
             var context = weights.CreateContext(parameters);
             var executor = new StatelessExecutor(weights, parameters);
             var embedder = new LLamaEmbedder(weights, parameters);
-            builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(embedder));
-            builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGeneration(weights, context, executor, config?.DefaultInferenceParams));
+            builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGenerator(embedder));
+            builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGenerator(weights, context, executor, config?.DefaultInferenceParams));
             return builder;
         }
     }
diff --git a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
index 78d4712b..bf3280a3 100644
--- a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
+++ b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
@@ -4,8 +4,6 @@
     <TargetFrameworks>net6.0;net7.0</TargetFrameworks>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
-
-    <Version>0.7.1</Version>
     <Version>0.8.0</Version>
     <Authors>Xbotter</Authors>
     <Company>SciSharp STACK</Company>
@@ -29,7 +27,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.12.231123.1-preview" />
+    <PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.18.231209.1-preview" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
similarity index 72%
rename from LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs
rename to LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
index 4421ed8a..a00c5352 100644
--- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs
+++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
@@ -1,6 +1,8 @@
 ﻿using LLama;
 using LLama.Abstractions;
 using LLama.Common;
+using Microsoft.KernelMemory;
+using Microsoft.KernelMemory.AI;
 using Microsoft.SemanticKernel.AI.Embeddings;
 using System;
 using System.Collections.Generic;
@@ -13,22 +15,23 @@ namespace LLamaSharp.KernelMemory
     /// <summary>
     /// Provides text embedding generation for LLamaSharp.
     /// </summary>
-    public class LLamaSharpTextEmbeddingGeneration : ITextEmbeddingGeneration, IDisposable
+    public class LLamaSharpTextEmbeddingGenerator
+        : ITextEmbeddingGenerator, IDisposable
     {
         private readonly LLamaSharpConfig? _config;
         private readonly LLamaWeights? _weights;
         private readonly LLamaEmbedder _embedder;
         private bool _ownsEmbedder = false;
         private bool _ownsWeights = false;
-        private readonly Dictionary<string, string> _attributes = new();
 
-        public IReadOnlyDictionary<string, string> Attributes => this._attributes;
+        /// <inheritdoc/>
+        public int MaxTokens => (int?)_config?.ContextSize ?? 2048;
 
         /// <summary>
-        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class.
+        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class.
         /// </summary>
         /// <param name="config">The configuration for LLamaSharp.</param>
-        public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config)
+        public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
         {
             this._config = config;
             var @params = new ModelParams(_config.ModelPath);
@@ -39,11 +42,11 @@ namespace LLamaSharp.KernelMemory
         }
 
         /// <summary>
-        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class from reused weights.
+        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class from reused weights.
         /// </summary>
         /// <param name="config">The configuration for LLamaSharp.</param>
         /// <param name="weights">A LLamaWeights object.</param>
-        public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config, LLamaWeights weights)
+        public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights weights)
         {
             this._config = config;
             var @params = new ModelParams(_config.ModelPath);
@@ -53,10 +56,10 @@ namespace LLamaSharp.KernelMemory
         }
 
         /// <summary>
-        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class from reused embedder.
+        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class from reused embedder.
         /// </summary>
         /// <param name="embedder">A LLamaEmbedder object.</param>
-        public LLamaSharpTextEmbeddingGeneration(LLamaEmbedder embedder)
+        public LLamaSharpTextEmbeddingGenerator(LLamaEmbedder embedder)
         {
             this._config = null;
             this._weights = null;
@@ -89,5 +92,15 @@ namespace LLamaSharp.KernelMemory
 
             return Task.FromResult(results);
         }
+
+        /// <inheritdoc/>
+        public Task<Embedding> GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default)
+        {
+            var embeddings = _embedder.GetEmbeddings(text);
+            return Task.FromResult(new Embedding(embeddings));
+        }
+
+        /// <inheritdoc/>
+        public int CountTokens(string text) => _embedder.Tokenize(text).Length;
     }
 }
diff --git a/LLama.KernelMemory/LlamaSharpTextGeneration.cs b/LLama.KernelMemory/LlamaSharpTextGenerator.cs
similarity index 86%
rename from LLama.KernelMemory/LlamaSharpTextGeneration.cs
rename to LLama.KernelMemory/LlamaSharpTextGenerator.cs
index 663a77cf..7269152b 100644
--- a/LLama.KernelMemory/LlamaSharpTextGeneration.cs
+++ b/LLama.KernelMemory/LlamaSharpTextGenerator.cs
@@ -13,7 +13,7 @@ namespace LLamaSharp.KernelMemory
     /// <summary>
     /// Provides text generation for LLamaSharp.
     /// </summary>
-    public class LlamaSharpTextGeneration : ITextGeneration, IDisposable
+    public class LlamaSharpTextGenerator : ITextGenerator, IDisposable
     {
         private readonly LLamaWeights _weights;
         private readonly StatelessExecutor _executor;
@@ -22,11 +22,13 @@ namespace LLamaSharp.KernelMemory
         private bool _ownsContext = false;
         private bool _ownsWeights = false;
 
+        public int MaxTokenTotal { get; }
+
         /// <summary>
-        /// Initializes a new instance of the <see cref="LlamaSharpTextGeneration"/> class.
+        /// Initializes a new instance of the <see cref="LlamaSharpTextGenerator"/> class.
         /// </summary>
         /// <param name="config">The configuration for LLamaSharp.</param>
-        public LlamaSharpTextGeneration(LLamaSharpConfig config)
+        public LlamaSharpTextGenerator(LLamaSharpConfig config)
         {
             var parameters = new ModelParams(config.ModelPath)
             {
@@ -39,21 +41,23 @@ namespace LLamaSharp.KernelMemory
             _executor = new StatelessExecutor(_weights, parameters);
             _defaultInferenceParams = config?.DefaultInferenceParams;
             _ownsWeights = _ownsContext = true;
+            MaxTokenTotal = (int)parameters.ContextSize;
         }
 
         /// <summary>
-        /// Initializes a new instance of the <see cref="LlamaSharpTextGeneration"/> class from reused weights, context and executor.
+        /// Initializes a new instance of the <see cref="LlamaSharpTextGenerator"/> class from reused weights, context and executor.
         /// If executor is not specified, then a StatelessExecutor will be created with `context.Params`. So far only `StatelessExecutor` is expected.
         /// </summary>
         /// <param name="weights">A LLamaWeights object.</param>
         /// <param name="context">A LLamaContext object.</param>
         /// <param name="executor">An executor. Currently only StatelessExecutor is expected.</param>
-        public LlamaSharpTextGeneration(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null)
+        public LlamaSharpTextGenerator(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null)
         {
             _weights = weights;
             _context = context;
             _executor = executor ?? new StatelessExecutor(_weights, _context.Params);
             _defaultInferenceParams = inferenceParams;
+            MaxTokenTotal = (int)_context.Params.ContextSize;
         }
 
         /// <inheritdoc/>
@@ -102,5 +106,8 @@ namespace LLamaSharp.KernelMemory
                 };
             }
         }
+
+        /// <inheritdoc/>
+        public int CountTokens(string text) => _context.Tokenize(text).Length;
     }
 }
diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
index e04ee9e4..aab3240f 100644
--- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
+++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
@@ -4,7 +4,7 @@ using System.Text.Json.Serialization;
 
 namespace LLamaSharp.SemanticKernel.ChatCompletion;
 
-public class ChatRequestSettings : AIRequestSettings
+public class ChatRequestSettings : PromptExecutionSettings
 {
     /// <summary>
     /// Temperature controls the randomness of the completion.
@@ -68,7 +68,7 @@ public class ChatRequestSettings : AIRequestSettings
     /// <param name="requestSettings">Template configuration</param>
     /// <param name="defaultMaxTokens">Default max tokens</param>
     /// <returns>An instance of OpenAIRequestSettings</returns>
-    public static ChatRequestSettings FromRequestSettings(AIRequestSettings? requestSettings, int? defaultMaxTokens = null)
+    public static ChatRequestSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null)
     {
         if (requestSettings is null)
         {
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
index 7e5425bb..9611a0cf 100644
--- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
@@ -1,8 +1,13 @@
 ﻿using LLama;
 using LLama.Abstractions;
+using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.AI;
 using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.Services;
+using System;
+using System.IO;
 using System.Runtime.CompilerServices;
+using System.Text;
 using static LLama.LLamaTransforms;
 
 namespace LLamaSharp.SemanticKernel.ChatCompletion;
@@ -10,7 +15,7 @@ namespace LLamaSharp.SemanticKernel.ChatCompletion;
 /// <summary>
 /// LLamaSharp ChatCompletion
 /// </summary>
-public sealed class LLamaSharpChatCompletion : IChatCompletion
+public sealed class LLamaSharpChatCompletion : IChatCompletionService
 {
     private readonly StatelessExecutor _model;
     private ChatRequestSettings defaultRequestSettings;
@@ -21,6 +26,8 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
 
     public IReadOnlyDictionary<string, string> Attributes => this._attributes;
 
+    IReadOnlyDictionary<string, object?> IAIService.Attributes => throw new NotImplementedException();
+
     static ChatRequestSettings GetDefaultSettings()
     {
         return new ChatRequestSettings
@@ -45,7 +52,6 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
                                                                                             $"{LLama.Common.AuthorRole.System}:"});
     }
 
-    /// <inheritdoc/>
     public ChatHistory CreateNewChat(string? instructions = "")
     {
         var history = new ChatHistory();
@@ -59,30 +65,41 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
     }
 
     /// <inheritdoc/>
-    public Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
+    public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
     {
-        var settings = requestSettings != null
-            ? ChatRequestSettings.FromRequestSettings(requestSettings)
-            : defaultRequestSettings;
-        var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory());
+        var settings = executionSettings != null
+           ? ChatRequestSettings.FromRequestSettings(executionSettings)
+           : defaultRequestSettings;
+        var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory());
 
         var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken);
 
-        return Task.FromResult<IReadOnlyList<IChatResult>>(new List<IChatResult> { new LLamaSharpChatResult(outputTransform.TransformAsync(result)) }.AsReadOnly());
+        var output = outputTransform.TransformAsync(result);
+
+        var sb = new StringBuilder();
+        await foreach (var token in output)
+        {
+            sb.Append(token);
+        }
+
+        return new List<ChatMessageContent> { new(AuthorRole.Assistant, sb.ToString()) }.AsReadOnly();
     }
 
     /// <inheritdoc/>
-#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously.
-    public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
-#pragma warning restore CS1998
+    public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
-        var settings = requestSettings != null
-            ? ChatRequestSettings.FromRequestSettings(requestSettings)
-            : defaultRequestSettings;
-        var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory());
-        // This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable.
+        var settings = executionSettings != null
+          ? ChatRequestSettings.FromRequestSettings(executionSettings)
+          : defaultRequestSettings;
+        var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory());
+
         var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken);
 
-        yield return new LLamaSharpChatResult(outputTransform.TransformAsync(result));
+        var output = outputTransform.TransformAsync(result);
+
+        await foreach (var token in output)
+        {
+            yield return new StreamingChatMessageContent(AuthorRole.Assistant, token);
+        }
     }
 }
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
deleted file mode 100644
index 1069feda..00000000
--- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
+++ /dev/null
@@ -1,14 +0,0 @@
-﻿using Microsoft.SemanticKernel.AI.ChatCompletion;
-
-namespace LLamaSharp.SemanticKernel.ChatCompletion;
-
-/// <summary>
-/// LLamaSharp Chat Message
-/// </summary>
-public class LLamaSharpChatMessage : ChatMessage
-{
-    /// <inheritdoc/>
-    public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content)
-    {
-    }
-}
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
deleted file mode 100644
index 07c3ac17..00000000
--- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
+++ /dev/null
@@ -1,44 +0,0 @@
-﻿using Microsoft.SemanticKernel.AI.ChatCompletion;
-using Microsoft.SemanticKernel.Orchestration;
-using System.Runtime.CompilerServices;
-using System.Text;
-
-namespace LLamaSharp.SemanticKernel.ChatCompletion;
-
-internal sealed class LLamaSharpChatResult : IChatResult, IChatStreamingResult
-{
-    private readonly ModelResult _modelResult;
-    private readonly IAsyncEnumerable<string> _stream;
-
-    /// <summary>
-    /// 
-    /// </summary>
-    /// <param name="stream"></param>
-    public LLamaSharpChatResult(IAsyncEnumerable<string> stream)
-    {
-        _stream = stream;
-        this._modelResult = new ModelResult(stream);
-    }
-
-    public ModelResult ModelResult => this._modelResult;
-
-    /// <inheritdoc/>
-    public async Task<ChatMessage> GetChatMessageAsync(CancellationToken cancellationToken = default)
-    {
-        var sb = new StringBuilder();
-        await foreach (var token in _stream)
-        {
-            sb.Append(token);
-        }
-        return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false);
-    }
-
-    /// <inheritdoc/>
-    public async IAsyncEnumerable<ChatMessage> GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
-    {
-        await foreach (var token in _stream)
-        {
-            yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token);
-        }
-    }
-}
diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
index f787ac50..501ca9d2 100644
--- a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
+++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
@@ -30,10 +30,11 @@
     <Platforms>AnyCPU;x64;Arm64</Platforms>
     <PackageId>LLamaSharp.semantic-kernel</PackageId>
     <Configurations>Debug;Release;GPU</Configurations>
+	<NoWarn>SKEXP0001,SKEXP0052</NoWarn>
 	</PropertyGroup>
 
 	<ItemGroup>
-	  <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-beta8" />
+	  <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-rc3" />
 	</ItemGroup>
 
   <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">
diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
index 059a9ff3..e7a6151b 100644
--- a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
+++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
@@ -1,12 +1,15 @@
 ﻿using LLama.Abstractions;
 using LLamaSharp.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.AI;
-using Microsoft.SemanticKernel.AI.TextCompletion;
+using Microsoft.SemanticKernel.AI.TextGeneration;
+using Microsoft.SemanticKernel.Services;
 using System.Runtime.CompilerServices;
+using System.Text;
 
 namespace LLamaSharp.SemanticKernel.TextCompletion;
 
-public sealed class LLamaSharpTextCompletion : ITextCompletion
+public sealed class LLamaSharpTextCompletion : ITextGenerationService
 {
     public ILLamaExecutor executor;
 
@@ -14,24 +17,34 @@ public sealed class LLamaSharpTextCompletion : ITextCompletion
 
     public IReadOnlyDictionary<string, string> Attributes => this._attributes;
 
+    IReadOnlyDictionary<string, object?> IAIService.Attributes => throw new NotImplementedException();
+
     public LLamaSharpTextCompletion(ILLamaExecutor executor)
     {
         this.executor = executor;
     }
 
-    public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default)
+    /// <inheritdoc/>
+    public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
     {
-        var settings = ChatRequestSettings.FromRequestSettings(requestSettings);
-        var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
-        return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false);
+        var settings = ChatRequestSettings.FromRequestSettings(executionSettings);
+        var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
+        var sb = new StringBuilder();
+        await foreach (var token in result)
+        {
+            sb.Append(token);
+        }
+        return new List<TextContent> { new(sb.ToString()) };
     }
 
-#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously.
-    public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, AIRequestSettings? requestSettings,[EnumeratorCancellation] CancellationToken cancellationToken = default)
-#pragma warning restore CS1998
+    /// <inheritdoc/>
+    public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
-        var settings = ChatRequestSettings.FromRequestSettings(requestSettings);
-        var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
-        yield return new LLamaTextResult(result);
+        var settings = ChatRequestSettings.FromRequestSettings(executionSettings);
+        var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
+        await foreach (var token in result)
+        {
+            yield return new StreamingTextContent(token);
+        }
     }
 }
diff --git a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
deleted file mode 100644
index b66013ba..00000000
--- a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
+++ /dev/null
@@ -1,37 +0,0 @@
-﻿using Microsoft.SemanticKernel.AI.TextCompletion;
-using Microsoft.SemanticKernel.Orchestration;
-using System.Runtime.CompilerServices;
-using System.Text;
-
-namespace LLamaSharp.SemanticKernel.TextCompletion;
-
-internal sealed class LLamaTextResult : ITextResult, ITextStreamingResult
-{
-    private readonly IAsyncEnumerable<string> _text;
-
-    public LLamaTextResult(IAsyncEnumerable<string> text)
-    {
-        _text = text;
-        ModelResult = new(text);
-    }
-
-    public ModelResult ModelResult { get; }
-
-    public async Task<string> GetCompletionAsync(CancellationToken cancellationToken = default)
-    {
-        var sb = new StringBuilder();
-        await foreach (var token in _text)
-        {
-            sb.Append(token);
-        }
-        return await Task.FromResult(sb.ToString()).ConfigureAwait(false);
-    }
-
-    public async IAsyncEnumerable<string> GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
-    {
-        await foreach (string word in _text)
-        {
-            yield return word;
-        }
-    }
-}
diff --git a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
index 155c5406..83c97f02 100644
--- a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
+++ b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
@@ -1,4 +1,5 @@
 ﻿using LLama;
+using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.AI.Embeddings;
 
 namespace LLamaSharp.SemanticKernel.TextEmbedding;
@@ -7,9 +8,9 @@ public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
 {
     private LLamaEmbedder _embedder;
 
-    private readonly Dictionary<string, string> _attributes = new();
+    private readonly Dictionary<string, object?> _attributes = new();
 
-    public IReadOnlyDictionary<string, string> Attributes => this._attributes;
+    public IReadOnlyDictionary<string, object?> Attributes => this._attributes;
 
     public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder)
     {
@@ -17,7 +18,7 @@ public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
     }
 
     /// <inheritdoc/>
-    public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, CancellationToken cancellationToken = default)
+    public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, Kernel? kernel = null, CancellationToken cancellationToken = default)
     {
         var embeddings = data.Select(text => new ReadOnlyMemory<float>(_embedder.GetEmbeddings(text))).ToList();
         return await Task.FromResult(embeddings);
diff --git a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
index 99881b57..f552114d 100644
--- a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
+++ b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
@@ -75,7 +75,7 @@ namespace LLama.Unittest.SemanticKernel
         public void ChatRequestSettings_FromAIRequestSettings()
         {
             // Arrange
-            var originalRequestSettings = new AIRequestSettings()
+            var originalRequestSettings = new PromptExecutionSettings()
             {
                 ServiceId = "test",
             };
@@ -92,7 +92,7 @@ namespace LLama.Unittest.SemanticKernel
         public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInSnakeCase()
         {
             // Arrange
-            var originalRequestSettings = new AIRequestSettings()
+            var originalRequestSettings = new PromptExecutionSettings()
             {
                 ServiceId = "test",
                 ExtensionData = new Dictionary<string, object>
@@ -131,7 +131,7 @@ namespace LLama.Unittest.SemanticKernel
         public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInPascalCase()
         {
             // Arrange
-            var originalRequestSettings = new AIRequestSettings()
+            var originalRequestSettings = new PromptExecutionSettings()
             {
                 ServiceId = "test",
                 ExtensionData = new Dictionary<string, object>
diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs
index ee23cd39..208dac1e 100644
--- a/LLama/LLamaEmbedder.cs
+++ b/LLama/LLamaEmbedder.cs
@@ -117,5 +117,13 @@ namespace LLama
         {
             _ctx.Dispose();
         }
+
+        /// <summary>
+        /// Tokenize a string.
+        /// </summary>
+        public int[] Tokenize(string text, bool addBos = true, bool special = false)
+        {
+            return _ctx.Tokenize(text, addBos, special);
+        }
     }
 }

From 213b4be723e2f1f7baf824764d9dc869f2c3c7fe Mon Sep 17 00:00:00 2001
From: xbotter <xbotter@live.cn>
Date: Thu, 14 Dec 2023 09:47:32 +0800
Subject: [PATCH 2/4] bump sk-1.0.0-rc4

---
 LLama.Examples/Examples/SemanticKernelChat.cs          |  9 +++++----
 LLama.Examples/Examples/SemanticKernelPrompt.cs        |  8 ++++----
 LLama.Examples/LLama.Examples.csproj                   |  2 +-
 .../ChatCompletion/ChatRequestSettings.cs              |  2 +-
 .../ChatCompletion/ChatRequestSettingsConverter.cs     |  9 ++++-----
 .../ChatCompletion/LLamaSharpChatCompletion.cs         |  9 +++------
 LLama.SemanticKernel/ExtensionMethods.cs               |  3 +--
 LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj  |  2 +-
 .../TextCompletion/LLamaSharpTextCompletion.cs         |  9 +++------
 .../TextEmbedding/LLamaSharpEmbeddingGeneration.cs     |  4 ++--
 .../SemanticKernel/ChatRequestSettingsTests.cs         | 10 +++++-----
 11 files changed, 30 insertions(+), 37 deletions(-)

diff --git a/LLama.Examples/Examples/SemanticKernelChat.cs b/LLama.Examples/Examples/SemanticKernelChat.cs
index a9d5be54..86d7a1d5 100644
--- a/LLama.Examples/Examples/SemanticKernelChat.cs
+++ b/LLama.Examples/Examples/SemanticKernelChat.cs
@@ -2,6 +2,7 @@
 using LLama.Common;
 using Microsoft.SemanticKernel.AI.ChatCompletion;
 using LLamaSharp.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel.ChatCompletion;
 
 namespace LLama.Examples.Examples
 {
@@ -29,8 +30,8 @@ namespace LLama.Examples.Examples
             await MessageOutputAsync(chatHistory);
 
             // First bot assistant message
-            string reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
-            chatHistory.AddAssistantMessage(reply);
+            var reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
+            chatHistory.AddAssistantMessage(reply.Content);
             await MessageOutputAsync(chatHistory);
 
             // Second user message
@@ -39,14 +40,14 @@ namespace LLama.Examples.Examples
 
             // Second bot assistant message
             reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
-            chatHistory.AddAssistantMessage(reply);
+            chatHistory.AddAssistantMessage(reply.Content);
             await MessageOutputAsync(chatHistory);
         }
 
         /// <summary>
         /// Outputs the last message of the chat history
         /// </summary>
-        private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory)
+        private static Task MessageOutputAsync(Microsoft.SemanticKernel.ChatCompletion.ChatHistory chatHistory)
         {
             var message = chatHistory.Last();
 
diff --git a/LLama.Examples/Examples/SemanticKernelPrompt.cs b/LLama.Examples/Examples/SemanticKernelPrompt.cs
index 21cb55de..4c4157a3 100644
--- a/LLama.Examples/Examples/SemanticKernelPrompt.cs
+++ b/LLama.Examples/Examples/SemanticKernelPrompt.cs
@@ -3,7 +3,7 @@ using LLama.Common;
 using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel;
 using LLamaSharp.SemanticKernel.TextCompletion;
-using Microsoft.SemanticKernel.AI.TextGeneration;
+using Microsoft.SemanticKernel.TextGeneration;
 using Microsoft.Extensions.DependencyInjection;
 
 namespace LLama.Examples.Examples
@@ -21,7 +21,7 @@ namespace LLama.Examples.Examples
             using var model = LLamaWeights.LoadFromFile(parameters);
             var ex = new StatelessExecutor(model, parameters);
 
-            var builder = new KernelBuilder();
+            var builder = Kernel.CreateBuilder();
             builder.Services.AddKeyedSingleton<ITextGenerationService>("local-llama", new LLamaSharpTextCompletion(ex));
 
             var kernel = builder.Build();
@@ -43,9 +43,9 @@ One line TLDR with the fewest words.";
 2. The acceleration of an object depends on the mass of the object and the amount of force applied.
 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";
 
-            Console.WriteLine((await kernel.InvokeAsync(summarize,new KernelArguments(text1))).GetValue<string>());
+            Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue<string>());
 
-            Console.WriteLine((await kernel.InvokeAsync(summarize, new KernelArguments(text2))).GetValue<string>());
+            Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue<string>());
         }
     }
 }
diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj
index 9e4f17ab..2266bdcf 100644
--- a/LLama.Examples/LLama.Examples.csproj
+++ b/LLama.Examples/LLama.Examples.csproj
@@ -16,7 +16,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
     <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.18.231209.1-preview" />
-    <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-rc3" />
+    <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-rc4" />
     <PackageReference Include="Spectre.Console" Version="0.48.0" />
   </ItemGroup>
 
diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
index aab3240f..ac22e1fc 100644
--- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
+++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
@@ -1,4 +1,4 @@
-﻿using Microsoft.SemanticKernel.AI;
+﻿using Microsoft.SemanticKernel;
 using System.Text.Json;
 using System.Text.Json.Serialization;
 
diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs
index f0d3a430..e320ea3f 100644
--- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs
+++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs
@@ -31,6 +31,10 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>
 
                 switch (propertyName)
                 {
+                    case "MODELID":
+                    case "MODEL_ID":
+                        requestSettings.ModelId = reader.GetString();
+                        break;
                     case "TEMPERATURE":
                         requestSettings.Temperature = reader.GetDouble();
                         break;
@@ -62,10 +66,6 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>
                     case "TOKEN_SELECTION_BIASES":
                         requestSettings.TokenSelectionBiases = JsonSerializer.Deserialize<IDictionary<int, int>>(ref reader, options) ?? new Dictionary<int, int>();
                         break;
-                    case "SERVICEID":
-                    case "SERVICE_ID":
-                        requestSettings.ServiceId = reader.GetString();
-                        break;
                     default:
                         reader.Skip();
                         break;
@@ -98,7 +98,6 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>
         writer.WriteNumber("results_per_prompt", value.ResultsPerPrompt);
         writer.WritePropertyName("token_selection_biases");
         JsonSerializer.Serialize(writer, value.TokenSelectionBiases, options);
-        writer.WriteString("service_id", value.ServiceId);
 
         writer.WriteEndObject();
     }
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
index 9611a0cf..b1c0d347 100644
--- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
@@ -1,8 +1,7 @@
 ﻿using LLama;
 using LLama.Abstractions;
 using Microsoft.SemanticKernel;
-using Microsoft.SemanticKernel.AI;
-using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.Services;
 using System;
 using System.IO;
@@ -22,11 +21,9 @@ public sealed class LLamaSharpChatCompletion : IChatCompletionService
     private readonly IHistoryTransform historyTransform;
     private readonly ITextStreamTransform outputTransform;
 
-    private readonly Dictionary<string, string> _attributes = new();
+    private readonly Dictionary<string, object?> _attributes = new();
 
-    public IReadOnlyDictionary<string, string> Attributes => this._attributes;
-
-    IReadOnlyDictionary<string, object?> IAIService.Attributes => throw new NotImplementedException();
+    public IReadOnlyDictionary<string, object?> Attributes => this._attributes;
 
     static ChatRequestSettings GetDefaultSettings()
     {
diff --git a/LLama.SemanticKernel/ExtensionMethods.cs b/LLama.SemanticKernel/ExtensionMethods.cs
index 6f39e373..85f9064c 100644
--- a/LLama.SemanticKernel/ExtensionMethods.cs
+++ b/LLama.SemanticKernel/ExtensionMethods.cs
@@ -1,6 +1,5 @@
 ﻿using LLamaSharp.SemanticKernel.ChatCompletion;
-using Microsoft.SemanticKernel.AI.ChatCompletion;
-
+using Microsoft.SemanticKernel.ChatCompletion;
 namespace LLamaSharp.SemanticKernel;
 
 public static class ExtensionMethods
diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
index 501ca9d2..8a39de53 100644
--- a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
+++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
@@ -34,7 +34,7 @@
 	</PropertyGroup>
 
 	<ItemGroup>
-	  <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-rc3" />
+	  <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-rc4" />
 	</ItemGroup>
 
   <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">
diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
index e7a6151b..08ec33e1 100644
--- a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
+++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
@@ -1,9 +1,8 @@
 ﻿using LLama.Abstractions;
 using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel;
-using Microsoft.SemanticKernel.AI;
-using Microsoft.SemanticKernel.AI.TextGeneration;
 using Microsoft.SemanticKernel.Services;
+using Microsoft.SemanticKernel.TextGeneration;
 using System.Runtime.CompilerServices;
 using System.Text;
 
@@ -13,11 +12,9 @@ public sealed class LLamaSharpTextCompletion : ITextGenerationService
 {
     public ILLamaExecutor executor;
 
-    private readonly Dictionary<string, string> _attributes = new();
+    private readonly Dictionary<string, object?> _attributes = new();
 
-    public IReadOnlyDictionary<string, string> Attributes => this._attributes;
-
-    IReadOnlyDictionary<string, object?> IAIService.Attributes => throw new NotImplementedException();
+    public IReadOnlyDictionary<string, object?> Attributes => this._attributes;
 
     public LLamaSharpTextCompletion(ILLamaExecutor executor)
     {
diff --git a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
index 83c97f02..73ceb0f2 100644
--- a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
+++ b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
@@ -1,10 +1,10 @@
 ﻿using LLama;
 using Microsoft.SemanticKernel;
-using Microsoft.SemanticKernel.AI.Embeddings;
+using Microsoft.SemanticKernel.Embeddings;
 
 namespace LLamaSharp.SemanticKernel.TextEmbedding;
 
-public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
+public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGenerationService
 {
     private LLamaEmbedder _embedder;
 
diff --git a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
index f552114d..ef5d9670 100644
--- a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
+++ b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
@@ -1,5 +1,5 @@
 ﻿using LLamaSharp.SemanticKernel.ChatCompletion;
-using Microsoft.SemanticKernel.AI;
+using Microsoft.SemanticKernel;
 
 namespace LLama.Unittest.SemanticKernel
 {
@@ -77,7 +77,7 @@ namespace LLama.Unittest.SemanticKernel
             // Arrange
             var originalRequestSettings = new PromptExecutionSettings()
             {
-                ServiceId = "test",
+                ModelId = "test",
             };
 
             // Act
@@ -85,7 +85,7 @@ namespace LLama.Unittest.SemanticKernel
 
             // Assert
             Assert.NotNull(requestSettings);
-            Assert.Equal(originalRequestSettings.ServiceId, requestSettings.ServiceId);
+            Assert.Equal(originalRequestSettings.ModelId, requestSettings.ModelId);
         }
 
         [Fact]
@@ -94,7 +94,7 @@ namespace LLama.Unittest.SemanticKernel
             // Arrange
             var originalRequestSettings = new PromptExecutionSettings()
             {
-                ServiceId = "test",
+                ModelId = "test",
                 ExtensionData = new Dictionary<string, object>
                 {
                     { "frequency_penalty", 0.5 },
@@ -133,7 +133,7 @@ namespace LLama.Unittest.SemanticKernel
             // Arrange
             var originalRequestSettings = new PromptExecutionSettings()
             {
-                ServiceId = "test",
+                ModelId = "test",
                 ExtensionData = new Dictionary<string, object>
                 {
                     { "FrequencyPenalty", 0.5 },

From 40ac944fb5b9abc3a3f59ea30d8c724ace8270e3 Mon Sep 17 00:00:00 2001
From: xbotter <xbotter@live.cn>
Date: Tue, 19 Dec 2023 08:42:01 +0800
Subject: [PATCH 3/4] Bump sk to 1.0.1

---
 LLama.Examples/Examples/SemanticKernelChat.cs         | 2 +-
 LLama.Examples/Examples/SemanticKernelPrompt.cs       | 2 +-
 LLama.Examples/LLama.Examples.csproj                  | 3 ++-
 LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/LLama.Examples/Examples/SemanticKernelChat.cs b/LLama.Examples/Examples/SemanticKernelChat.cs
index 86d7a1d5..52324eed 100644
--- a/LLama.Examples/Examples/SemanticKernelChat.cs
+++ b/LLama.Examples/Examples/SemanticKernelChat.cs
@@ -10,7 +10,7 @@ namespace LLama.Examples.Examples
     {
         public static async Task Run()
         {
-            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
+            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
             Console.Write("Please input your model path: ");
             var modelPath = Console.ReadLine();
 
diff --git a/LLama.Examples/Examples/SemanticKernelPrompt.cs b/LLama.Examples/Examples/SemanticKernelPrompt.cs
index 4c4157a3..40838e8b 100644
--- a/LLama.Examples/Examples/SemanticKernelPrompt.cs
+++ b/LLama.Examples/Examples/SemanticKernelPrompt.cs
@@ -12,7 +12,7 @@ namespace LLama.Examples.Examples
     {
         public static async Task Run()
         {
-            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
+            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
             Console.Write("Please input your model path: ");
             var modelPath = Console.ReadLine();
 
diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj
index 94704e01..b0d7740e 100644
--- a/LLama.Examples/LLama.Examples.csproj
+++ b/LLama.Examples/LLama.Examples.csproj
@@ -16,7 +16,8 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
     <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.18.231209.1-preview" />
-    <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-rc4" />
+    <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.1" />
+    <PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.0.1-alpha" />
     <PackageReference Include="Spectre.Console" Version="0.48.0" />
   </ItemGroup>
 
diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
index 8a39de53..2f365924 100644
--- a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
+++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
@@ -34,7 +34,7 @@
 	</PropertyGroup>
 
 	<ItemGroup>
-	  <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-rc4" />
+	  <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.1" />
 	</ItemGroup>
 
   <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">

From 211ce12bf57e315b6e8987e3736c6b0e722a15b5 Mon Sep 17 00:00:00 2001
From: xbotter <xbotter@live.cn>
Date: Thu, 21 Dec 2023 10:28:37 +0800
Subject: [PATCH 4/4] LLamaEmbedder exposes the Context

---
 .../LLamaSharpTextEmbeddingGenerator.cs       |  2 +-
 LLama/LLamaEmbedder.cs                        | 32 ++-----------------
 2 files changed, 3 insertions(+), 31 deletions(-)

diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
index a00c5352..8148adc8 100644
--- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
+++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
@@ -101,6 +101,6 @@ namespace LLamaSharp.KernelMemory
         }
 
         /// <inheritdoc/>
-        public int CountTokens(string text) => _embedder.Tokenize(text).Length;
+        public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length;
     }
 }
diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs
index 208dac1e..ab56280c 100644
--- a/LLama/LLamaEmbedder.cs
+++ b/LLama/LLamaEmbedder.cs
@@ -20,30 +20,9 @@ namespace LLama
         public int EmbeddingSize => _ctx.EmbeddingSize;
 
         /// <summary>
-        /// Create a new embedder (loading temporary weights)
+        /// LLama Context
         /// </summary>
-        /// <param name="allParams"></param>
-        /// <param name="logger"></param>
-        [Obsolete("Preload LLamaWeights and use the constructor which accepts them")]
-        public LLamaEmbedder(ILLamaParams allParams, ILogger? logger = null)
-            : this(allParams, allParams, logger)
-        {
-        }
-
-        /// <summary>
-        /// Create a new embedder (loading temporary weights)
-        /// </summary>
-        /// <param name="modelParams"></param>
-        /// <param name="contextParams"></param>
-        /// <param name="logger"></param>
-        [Obsolete("Preload LLamaWeights and use the constructor which accepts them")]
-        public LLamaEmbedder(IModelParams modelParams, IContextParams contextParams, ILogger? logger = null)
-        {
-            using var weights = LLamaWeights.LoadFromFile(modelParams);
-
-            contextParams.EmbeddingMode = true;
-            _ctx = weights.CreateContext(contextParams, logger);
-        }
+        public LLamaContext Context => this._ctx;
 
         /// <summary>
         /// Create a new embedder, using the given LLamaWeights
@@ -118,12 +97,5 @@ namespace LLama
             _ctx.Dispose();
         }
 
-        /// <summary>
-        /// Tokenize a string.
-        /// </summary>
-        public int[] Tokenize(string text, bool addBos = true, bool special = false)
-        {
-            return _ctx.Tokenize(text, addBos, special);
-        }
     }
 }