Merge pull request #356 from xbotter/deps/sk-rc3

bump sk to 1.0.1 & km to 0.18
2 years ago · f0d7468b22
--- a/LLama.Examples/Examples/KernelMemory.cs
+++ b/LLama.Examples/Examples/KernelMemory.cs
@@ -16,6 +16,11 @@ namespace LLama.Examples.Examples
            Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs");
            Console.Write("Please input your model path: ");
            var modelPath = Console.ReadLine();
            var searchClientConfig = new SearchClientConfig
            {
                MaxMatchesCount = 1,
                AnswerTokens = 100,
            };
            var memory = new KernelMemoryBuilder()
                    .WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath)
                    {
@@ -24,6 +29,7 @@ namespace LLama.Examples.Examples
                            AntiPrompts = new List<string> { "\n\n" }
                        }
                    })
                    .WithSearchClientConfig(searchClientConfig)
                    .With(new TextPartitioningOptions
                    {
                        MaxTokensPerParagraph = 300,
--- a/LLama.Examples/Examples/SemanticKernelChat.cs
+++ b/LLama.Examples/Examples/SemanticKernelChat.cs
@@ -2,6 +2,7 @@
 using LLama.Common;
 using Microsoft.SemanticKernel.AI.ChatCompletion;
 using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.ChatCompletion;

 namespace LLama.Examples.Examples
 {
@@ -9,7 +10,7 @@ namespace LLama.Examples.Examples
    {
        public static async Task Run()
        {
            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
            Console.Write("Please input your model path: ");
            var modelPath = Console.ReadLine();

@@ -29,8 +30,8 @@ namespace LLama.Examples.Examples
            await MessageOutputAsync(chatHistory);

            // First bot assistant message
            string reply = await chatGPT.GenerateMessageAsync(chatHistory);
            chatHistory.AddAssistantMessage(reply);
            var reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
            chatHistory.AddAssistantMessage(reply.Content);
            await MessageOutputAsync(chatHistory);

            // Second user message
@@ -38,15 +39,15 @@ namespace LLama.Examples.Examples
            await MessageOutputAsync(chatHistory);

            // Second bot assistant message
            reply = await chatGPT.GenerateMessageAsync(chatHistory);
            chatHistory.AddAssistantMessage(reply);
            reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
            chatHistory.AddAssistantMessage(reply.Content);
            await MessageOutputAsync(chatHistory);
        }

        /// <summary>
        /// Outputs the last message of the chat history
        /// </summary>
        private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory)
        private static Task MessageOutputAsync(Microsoft.SemanticKernel.ChatCompletion.ChatHistory chatHistory)
        {
            var message = chatHistory.Last();

--- a/LLama.Examples/Examples/SemanticKernelPrompt.cs
+++ b/LLama.Examples/Examples/SemanticKernelPrompt.cs
@@ -2,8 +2,9 @@
 using LLama.Common;
 using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.AI.TextCompletion;
 using LLamaSharp.SemanticKernel.TextCompletion;
 using Microsoft.SemanticKernel.TextGeneration;
 using Microsoft.Extensions.DependencyInjection;

 namespace LLama.Examples.Examples
 {
@@ -11,7 +12,7 @@ namespace LLama.Examples.Examples
    {
        public static async Task Run()
        {
            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
            Console.Write("Please input your model path: ");
            var modelPath = Console.ReadLine();

@@ -20,8 +21,8 @@ namespace LLama.Examples.Examples
            using var model = LLamaWeights.LoadFromFile(parameters);
            var ex = new StatelessExecutor(model, parameters);

            var builder = new KernelBuilder();
            builder.WithAIService<ITextCompletion>("local-llama", new LLamaSharpTextCompletion(ex), true);
            var builder = Kernel.CreateBuilder();
            builder.Services.AddKeyedSingleton<ITextGenerationService>("local-llama", new LLamaSharpTextCompletion(ex));

            var kernel = builder.Build();

@@ -29,8 +30,8 @@ namespace LLama.Examples.Examples

 One line TLDR with the fewest words.";

            ChatRequestSettings settings = new() {MaxTokens = 100};
            var summarize = kernel.CreateSemanticFunction(prompt, requestSettings: settings);
            ChatRequestSettings settings = new() { MaxTokens = 100 };
            var summarize = kernel.CreateFunctionFromPrompt(prompt, settings);

            string text1 = @"
 1st Law of Thermodynamics - Energy cannot be created or destroyed.
@@ -42,10 +43,9 @@ One line TLDR with the fewest words.";
 2. The acceleration of an object depends on the mass of the object and the amount of force applied.
 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";

            Console.WriteLine((await kernel.RunAsync(text1, summarize)).GetValue<string>());
            Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue<string>());

            Console.WriteLine((await kernel.RunAsync(text2, summarize)).GetValue<string>());
            Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue<string>());
        }
    }
 }
 
--- a/LLama.Examples/LLama.Examples.csproj
+++ b/LLama.Examples/LLama.Examples.csproj
@@ -1,4 +1,4 @@
 <Project Sdk="Microsoft.NET.Sdk">
 <Project Sdk="Microsoft.NET.Sdk">
  <Import Project="..\LLama\LLamaSharp.Runtime.targets" />
  <PropertyGroup>
    <OutputType>Exe</OutputType>
@@ -9,28 +9,15 @@
    <!-- Set IncludeBuiltInRuntimes to false to include your own runtime libraries and not link the defaults -->
    <IncludeBuiltInRuntimes>true</IncludeBuiltInRuntimes>
    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
  </PropertyGroup>

  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
    <NoWarn>1701;1702;8604</NoWarn>
  </PropertyGroup>

  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
    <NoWarn>1701;1702;8604</NoWarn>
  </PropertyGroup>

  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <NoWarn>1701;1702;8604</NoWarn>
  </PropertyGroup>

  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <NoWarn>1701;1702;8604</NoWarn>
 	  <LangVersion>12</LangVersion>
 	<NoWarn>1701;1702;8604;SKEXP0001;SKEXP0052;SKEXP0003</NoWarn>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
    <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.12.231123.1-preview" />
    <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta8" />
    <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.18.231209.1-preview" />
    <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.1" />
    <PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.0.1-alpha" />
    <PackageReference Include="Spectre.Console" Version="0.48.0" />
  </ItemGroup>

--- a/LLama.KernelMemory/BuilderExtensions.cs
+++ b/LLama.KernelMemory/BuilderExtensions.cs
@@ -17,19 +17,6 @@ namespace LLamaSharp.KernelMemory
    public static class BuilderExtensions
    {

        private static IKernelMemoryBuilder WithCustomEmbeddingGeneration(this IKernelMemoryBuilder builder, ITextEmbeddingGeneration embeddingGeneration)
        {
            builder.AddSingleton<ITextEmbeddingGeneration>(embeddingGeneration);
            builder.AddIngestionEmbeddingGenerator(embeddingGeneration);
            return builder;
        }

        private static IKernelMemoryBuilder WithCustomTextGeneration(this IKernelMemoryBuilder builder, ITextGeneration textGeneration)
        {
            builder.AddSingleton<ITextGeneration>(textGeneration);
            return builder;
        }

        /// <summary>
        /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder.
        /// </summary>
@@ -38,7 +25,9 @@ namespace LLamaSharp.KernelMemory
        /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added.</returns>
        public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config)
        {
            builder.WithCustomEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(config));
            var generator = new LLamaSharpTextEmbeddingGenerator(config);
            builder.AddSingleton<ITextEmbeddingGenerator>(generator);
            builder.AddIngestionEmbeddingGenerator(generator);
            return builder;
        }

@@ -46,11 +35,12 @@ namespace LLamaSharp.KernelMemory
        /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder.
        /// </summary>
        /// <param name="builder">The KernelMemoryBuilder instance.</param>
        /// <param name="textEmbeddingGeneration">The LLamaSharpTextEmbeddingGeneration instance.</param>
        /// <param name="textEmbeddingGenerator">The LLamaSharpTextEmbeddingGeneration instance.</param>
        /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added.</returns>
        public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGeneration textEmbeddingGeneration)
        public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGenerator textEmbeddingGenerator)
        {
            builder.WithCustomEmbeddingGeneration(textEmbeddingGeneration);
            builder.AddSingleton<ITextEmbeddingGenerator>(textEmbeddingGenerator);
            builder.AddIngestionEmbeddingGenerator(textEmbeddingGenerator);
            return builder;
        }

@@ -62,7 +52,7 @@ namespace LLamaSharp.KernelMemory
        /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextGeneration added.</returns>
        public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config)
        {
            builder.WithCustomTextGeneration(new LlamaSharpTextGeneration(config));
            builder.AddSingleton<ITextGenerator>(new LlamaSharpTextGenerator(config));
            return builder;
        }

@@ -70,11 +60,11 @@ namespace LLamaSharp.KernelMemory
        /// Adds LLamaSharpTextGeneration to the KernelMemoryBuilder.
        /// </summary>
        /// <param name="builder">The KernelMemoryBuilder instance.</param>
        /// <param name="textGeneration">The LlamaSharpTextGeneration instance.</param>
        /// <param name="textGenerator">The LlamaSharpTextGeneration instance.</param>
        /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextGeneration added.</returns>
        public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGeneration textGeneration)
        public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGenerator textGenerator)
        {
            builder.WithCustomTextGeneration(textGeneration);
            builder.AddSingleton<ITextGenerator>(textGenerator);
            return builder;
        }

@@ -96,8 +86,8 @@ namespace LLamaSharp.KernelMemory
            var context = weights.CreateContext(parameters);
            var executor = new StatelessExecutor(weights, parameters);
            var embedder = new LLamaEmbedder(weights, parameters);
            builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(embedder));
            builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGeneration(weights, context, executor, config?.DefaultInferenceParams));
            builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGenerator(embedder));
            builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGenerator(weights, context, executor, config?.DefaultInferenceParams));
            return builder;
        }
    }
--- a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
+++ b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
@@ -4,8 +4,6 @@
    <TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>

    <Version>0.7.1</Version>
    <Version>0.8.0</Version>
    <Authors>Xbotter</Authors>
    <Company>SciSharp STACK</Company>
@@ -29,7 +27,7 @@
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.12.231123.1-preview" />
    <PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.18.231209.1-preview" />
  </ItemGroup>

  <ItemGroup>
--- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs
+++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs
@@ -1,6 +1,8 @@
 using LLama;
 using LLama.Abstractions;
 using LLama.Common;
 using Microsoft.KernelMemory;
 using Microsoft.KernelMemory.AI;
 using Microsoft.SemanticKernel.AI.Embeddings;
 using System;
 using System.Collections.Generic;
@@ -13,22 +15,23 @@ namespace LLamaSharp.KernelMemory
    /// <summary>
    /// Provides text embedding generation for LLamaSharp.
    /// </summary>
    public class LLamaSharpTextEmbeddingGeneration : ITextEmbeddingGeneration, IDisposable
    public class LLamaSharpTextEmbeddingGenerator
        : ITextEmbeddingGenerator, IDisposable
    {
        private readonly LLamaSharpConfig? _config;
        private readonly LLamaWeights? _weights;
        private readonly LLamaEmbedder _embedder;
        private bool _ownsEmbedder = false;
        private bool _ownsWeights = false;
        private readonly Dictionary<string, string> _attributes = new();

        public IReadOnlyDictionary<string, string> Attributes => this._attributes;
        /// <inheritdoc/>
        public int MaxTokens => (int?)_config?.ContextSize ?? 2048;

        /// <summary>
        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class.
        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class.
        /// </summary>
        /// <param name="config">The configuration for LLamaSharp.</param>
        public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config)
        public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
        {
            this._config = config;
            var @params = new ModelParams(_config.ModelPath);
@@ -39,11 +42,11 @@ namespace LLamaSharp.KernelMemory
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class from reused weights.
        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class from reused weights.
        /// </summary>
        /// <param name="config">The configuration for LLamaSharp.</param>
        /// <param name="weights">A LLamaWeights object.</param>
        public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config, LLamaWeights weights)
        public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights weights)
        {
            this._config = config;
            var @params = new ModelParams(_config.ModelPath);
@@ -53,10 +56,10 @@ namespace LLamaSharp.KernelMemory
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class from reused embedder.
        /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class from reused embedder.
        /// </summary>
        /// <param name="embedder">A LLamaEmbedder object.</param>
        public LLamaSharpTextEmbeddingGeneration(LLamaEmbedder embedder)
        public LLamaSharpTextEmbeddingGenerator(LLamaEmbedder embedder)
        {
            this._config = null;
            this._weights = null;
@@ -89,5 +92,15 @@ namespace LLamaSharp.KernelMemory

            return Task.FromResult(results);
        }

        /// <inheritdoc/>
        public Task<Embedding> GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default)
        {
            var embeddings = _embedder.GetEmbeddings(text);
            return Task.FromResult(new Embedding(embeddings));
        }

        /// <inheritdoc/>
        public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length;
    }
 }
--- a/LLama.KernelMemory/LlamaSharpTextGeneration.cs
+++ b/LLama.KernelMemory/LlamaSharpTextGeneration.cs
@@ -13,7 +13,7 @@ namespace LLamaSharp.KernelMemory
    /// <summary>
    /// Provides text generation for LLamaSharp.
    /// </summary>
    public class LlamaSharpTextGeneration : ITextGeneration, IDisposable
    public class LlamaSharpTextGenerator : ITextGenerator, IDisposable
    {
        private readonly LLamaWeights _weights;
        private readonly StatelessExecutor _executor;
@@ -22,11 +22,13 @@ namespace LLamaSharp.KernelMemory
        private bool _ownsContext = false;
        private bool _ownsWeights = false;

        public int MaxTokenTotal { get; }

        /// <summary>
        /// Initializes a new instance of the <see cref="LlamaSharpTextGeneration"/> class.
        /// Initializes a new instance of the <see cref="LlamaSharpTextGenerator"/> class.
        /// </summary>
        /// <param name="config">The configuration for LLamaSharp.</param>
        public LlamaSharpTextGeneration(LLamaSharpConfig config)
        public LlamaSharpTextGenerator(LLamaSharpConfig config)
        {
            var parameters = new ModelParams(config.ModelPath)
            {
@@ -39,21 +41,23 @@ namespace LLamaSharp.KernelMemory
            _executor = new StatelessExecutor(_weights, parameters);
            _defaultInferenceParams = config?.DefaultInferenceParams;
            _ownsWeights = _ownsContext = true;
            MaxTokenTotal = (int)parameters.ContextSize;
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="LlamaSharpTextGeneration"/> class from reused weights, context and executor.
        /// Initializes a new instance of the <see cref="LlamaSharpTextGenerator"/> class from reused weights, context and executor.
        /// If executor is not specified, then a StatelessExecutor will be created with `context.Params`. So far only `StatelessExecutor` is expected.
        /// </summary>
        /// <param name="weights">A LLamaWeights object.</param>
        /// <param name="context">A LLamaContext object.</param>
        /// <param name="executor">An executor. Currently only StatelessExecutor is expected.</param>
        public LlamaSharpTextGeneration(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null)
        public LlamaSharpTextGenerator(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null)
        {
            _weights = weights;
            _context = context;
            _executor = executor ?? new StatelessExecutor(_weights, _context.Params);
            _defaultInferenceParams = inferenceParams;
            MaxTokenTotal = (int)_context.Params.ContextSize;
        }

        /// <inheritdoc/>
@@ -102,5 +106,8 @@ namespace LLamaSharp.KernelMemory
                };
            }
        }

        /// <inheritdoc/>
        public int CountTokens(string text) => _context.Tokenize(text).Length;
    }
 }
--- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
+++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
@@ -1,10 +1,10 @@
 using Microsoft.SemanticKernel.AI;
 using Microsoft.SemanticKernel;
 using System.Text.Json;
 using System.Text.Json.Serialization;

 namespace LLamaSharp.SemanticKernel.ChatCompletion;

 public class ChatRequestSettings : AIRequestSettings
 public class ChatRequestSettings : PromptExecutionSettings
 {
    /// <summary>
    /// Temperature controls the randomness of the completion.
@@ -68,7 +68,7 @@ public class ChatRequestSettings : AIRequestSettings
    /// <param name="requestSettings">Template configuration</param>
    /// <param name="defaultMaxTokens">Default max tokens</param>
    /// <returns>An instance of OpenAIRequestSettings</returns>
    public static ChatRequestSettings FromRequestSettings(AIRequestSettings? requestSettings, int? defaultMaxTokens = null)
    public static ChatRequestSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null)
    {
        if (requestSettings is null)
        {
--- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs
+++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs
@@ -31,6 +31,10 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>

                switch (propertyName)
                {
                    case "MODELID":
                    case "MODEL_ID":
                        requestSettings.ModelId = reader.GetString();
                        break;
                    case "TEMPERATURE":
                        requestSettings.Temperature = reader.GetDouble();
                        break;
@@ -62,10 +66,6 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>
                    case "TOKEN_SELECTION_BIASES":
                        requestSettings.TokenSelectionBiases = JsonSerializer.Deserialize<IDictionary<int, int>>(ref reader, options) ?? new Dictionary<int, int>();
                        break;
                    case "SERVICEID":
                    case "SERVICE_ID":
                        requestSettings.ServiceId = reader.GetString();
                        break;
                    default:
                        reader.Skip();
                        break;
@@ -98,7 +98,6 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>
        writer.WriteNumber("results_per_prompt", value.ResultsPerPrompt);
        writer.WritePropertyName("token_selection_biases");
        JsonSerializer.Serialize(writer, value.TokenSelectionBiases, options);
        writer.WriteString("service_id", value.ServiceId);

        writer.WriteEndObject();
    }
--- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
@@ -1,8 +1,12 @@
 using LLama;
 using LLama.Abstractions;
 using Microsoft.SemanticKernel.AI;
 using Microsoft.SemanticKernel.AI.ChatCompletion;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.Services;
 using System;
 using System.IO;
 using System.Runtime.CompilerServices;
 using System.Text;
 using static LLama.LLamaTransforms;

 namespace LLamaSharp.SemanticKernel.ChatCompletion;
@@ -10,16 +14,16 @@ namespace LLamaSharp.SemanticKernel.ChatCompletion;
 /// <summary>
 /// LLamaSharp ChatCompletion
 /// </summary>
 public sealed class LLamaSharpChatCompletion : IChatCompletion
 public sealed class LLamaSharpChatCompletion : IChatCompletionService
 {
    private readonly StatelessExecutor _model;
    private ChatRequestSettings defaultRequestSettings;
    private readonly IHistoryTransform historyTransform;
    private readonly ITextStreamTransform outputTransform;

    private readonly Dictionary<string, string> _attributes = new();
    private readonly Dictionary<string, object?> _attributes = new();

    public IReadOnlyDictionary<string, string> Attributes => this._attributes;
    public IReadOnlyDictionary<string, object?> Attributes => this._attributes;

    static ChatRequestSettings GetDefaultSettings()
    {
@@ -45,7 +49,6 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
                                                                                            $"{LLama.Common.AuthorRole.System}:"});
    }

    /// <inheritdoc/>
    public ChatHistory CreateNewChat(string? instructions = "")
    {
        var history = new ChatHistory();
@@ -59,30 +62,41 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
    }

    /// <inheritdoc/>
    public Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
    public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
    {
        var settings = requestSettings != null
            ? ChatRequestSettings.FromRequestSettings(requestSettings)
            : defaultRequestSettings;
        var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory());
        var settings = executionSettings != null
           ? ChatRequestSettings.FromRequestSettings(executionSettings)
           : defaultRequestSettings;
        var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory());

        var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken);

        return Task.FromResult<IReadOnlyList<IChatResult>>(new List<IChatResult> { new LLamaSharpChatResult(outputTransform.TransformAsync(result)) }.AsReadOnly());
        var output = outputTransform.TransformAsync(result);

        var sb = new StringBuilder();
        await foreach (var token in output)
        {
            sb.Append(token);
        }

        return new List<ChatMessageContent> { new(AuthorRole.Assistant, sb.ToString()) }.AsReadOnly();
    }

    /// <inheritdoc/>
 #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously.
    public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
 #pragma warning restore CS1998
    public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        var settings = requestSettings != null
            ? ChatRequestSettings.FromRequestSettings(requestSettings)
            : defaultRequestSettings;
        var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory());
        // This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable.
        var settings = executionSettings != null
          ? ChatRequestSettings.FromRequestSettings(executionSettings)
          : defaultRequestSettings;
        var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory());

        var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken);

        yield return new LLamaSharpChatResult(outputTransform.TransformAsync(result));
        var output = outputTransform.TransformAsync(result);

        await foreach (var token in output)
        {
            yield return new StreamingChatMessageContent(AuthorRole.Assistant, token);
        }
    }
 }
--- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
@@ -1,14 +0,0 @@
 using Microsoft.SemanticKernel.AI.ChatCompletion;

 namespace LLamaSharp.SemanticKernel.ChatCompletion;

 /// <summary>
 /// LLamaSharp Chat Message
 /// </summary>
 public class LLamaSharpChatMessage : ChatMessage
 {
    /// <inheritdoc/>
    public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content)
    {
    }
 }
--- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
@@ -1,44 +0,0 @@
 using Microsoft.SemanticKernel.AI.ChatCompletion;
 using Microsoft.SemanticKernel.Orchestration;
 using System.Runtime.CompilerServices;
 using System.Text;

 namespace LLamaSharp.SemanticKernel.ChatCompletion;

 internal sealed class LLamaSharpChatResult : IChatResult, IChatStreamingResult
 {
    private readonly ModelResult _modelResult;
    private readonly IAsyncEnumerable<string> _stream;

    /// <summary>
    /// 
    /// </summary>
    /// <param name="stream"></param>
    public LLamaSharpChatResult(IAsyncEnumerable<string> stream)
    {
        _stream = stream;
        this._modelResult = new ModelResult(stream);
    }

    public ModelResult ModelResult => this._modelResult;

    /// <inheritdoc/>
    public async Task<ChatMessage> GetChatMessageAsync(CancellationToken cancellationToken = default)
    {
        var sb = new StringBuilder();
        await foreach (var token in _stream)
        {
            sb.Append(token);
        }
        return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false);
    }

    /// <inheritdoc/>
    public async IAsyncEnumerable<ChatMessage> GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        await foreach (var token in _stream)
        {
            yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token);
        }
    }
 }
--- a/LLama.SemanticKernel/ExtensionMethods.cs
+++ b/LLama.SemanticKernel/ExtensionMethods.cs
@@ -1,6 +1,5 @@
 using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.AI.ChatCompletion;

 using Microsoft.SemanticKernel.ChatCompletion;
 namespace LLamaSharp.SemanticKernel;

 public static class ExtensionMethods
--- a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
+++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
@@ -30,10 +30,11 @@
    <Platforms>AnyCPU;x64;Arm64</Platforms>
    <PackageId>LLamaSharp.semantic-kernel</PackageId>
    <Configurations>Debug;Release;GPU</Configurations>
 	<NoWarn>SKEXP0001,SKEXP0052</NoWarn>
 	</PropertyGroup>

 	<ItemGroup>
 	  <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-beta8" />
 	  <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.1" />
 	</ItemGroup>

  <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">
--- a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
+++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
@@ -1,37 +1,47 @@
 using LLama.Abstractions;
 using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.AI;
 using Microsoft.SemanticKernel.AI.TextCompletion;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.Services;
 using Microsoft.SemanticKernel.TextGeneration;
 using System.Runtime.CompilerServices;
 using System.Text;

 namespace LLamaSharp.SemanticKernel.TextCompletion;

 public sealed class LLamaSharpTextCompletion : ITextCompletion
 public sealed class LLamaSharpTextCompletion : ITextGenerationService
 {
    public ILLamaExecutor executor;

    private readonly Dictionary<string, string> _attributes = new();
    private readonly Dictionary<string, object?> _attributes = new();

    public IReadOnlyDictionary<string, string> Attributes => this._attributes;
    public IReadOnlyDictionary<string, object?> Attributes => this._attributes;

    public LLamaSharpTextCompletion(ILLamaExecutor executor)
    {
        this.executor = executor;
    }

    public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default)
    /// <inheritdoc/>
    public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
    {
        var settings = ChatRequestSettings.FromRequestSettings(requestSettings);
        var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
        return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false);
        var settings = ChatRequestSettings.FromRequestSettings(executionSettings);
        var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
        var sb = new StringBuilder();
        await foreach (var token in result)
        {
            sb.Append(token);
        }
        return new List<TextContent> { new(sb.ToString()) };
    }

 #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously.
    public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, AIRequestSettings? requestSettings,[EnumeratorCancellation] CancellationToken cancellationToken = default)
 #pragma warning restore CS1998
    /// <inheritdoc/>
    public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        var settings = ChatRequestSettings.FromRequestSettings(requestSettings);
        var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
        yield return new LLamaTextResult(result);
        var settings = ChatRequestSettings.FromRequestSettings(executionSettings);
        var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
        await foreach (var token in result)
        {
            yield return new StreamingTextContent(token);
        }
    }
 }
--- a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
+++ b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
@@ -1,37 +0,0 @@
 using Microsoft.SemanticKernel.AI.TextCompletion;
 using Microsoft.SemanticKernel.Orchestration;
 using System.Runtime.CompilerServices;
 using System.Text;

 namespace LLamaSharp.SemanticKernel.TextCompletion;

 internal sealed class LLamaTextResult : ITextResult, ITextStreamingResult
 {
    private readonly IAsyncEnumerable<string> _text;

    public LLamaTextResult(IAsyncEnumerable<string> text)
    {
        _text = text;
        ModelResult = new(text);
    }

    public ModelResult ModelResult { get; }

    public async Task<string> GetCompletionAsync(CancellationToken cancellationToken = default)
    {
        var sb = new StringBuilder();
        await foreach (var token in _text)
        {
            sb.Append(token);
        }
        return await Task.FromResult(sb.ToString()).ConfigureAwait(false);
    }

    public async IAsyncEnumerable<string> GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
    {
        await foreach (string word in _text)
        {
            yield return word;
        }
    }
 }
--- a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
+++ b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
@@ -1,15 +1,16 @@
 using LLama;
 using Microsoft.SemanticKernel.AI.Embeddings;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.Embeddings;

 namespace LLamaSharp.SemanticKernel.TextEmbedding;

 public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
 public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGenerationService
 {
    private LLamaEmbedder _embedder;

    private readonly Dictionary<string, string> _attributes = new();
    private readonly Dictionary<string, object?> _attributes = new();

    public IReadOnlyDictionary<string, string> Attributes => this._attributes;
    public IReadOnlyDictionary<string, object?> Attributes => this._attributes;

    public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder)
    {
@@ -17,7 +18,7 @@ public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
    }

    /// <inheritdoc/>
    public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, CancellationToken cancellationToken = default)
    public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, Kernel? kernel = null, CancellationToken cancellationToken = default)
    {
        var embeddings = data.Select(text => new ReadOnlyMemory<float>(_embedder.GetEmbeddings(text))).ToList();
        return await Task.FromResult(embeddings);
--- a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
+++ b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
@@ -1,5 +1,5 @@
 using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.AI;
 using Microsoft.SemanticKernel;

 namespace LLama.Unittest.SemanticKernel
 {
@@ -75,9 +75,9 @@ namespace LLama.Unittest.SemanticKernel
        public void ChatRequestSettings_FromAIRequestSettings()
        {
            // Arrange
            var originalRequestSettings = new AIRequestSettings()
            var originalRequestSettings = new PromptExecutionSettings()
            {
                ServiceId = "test",
                ModelId = "test",
            };

            // Act
@@ -85,16 +85,16 @@ namespace LLama.Unittest.SemanticKernel

            // Assert
            Assert.NotNull(requestSettings);
            Assert.Equal(originalRequestSettings.ServiceId, requestSettings.ServiceId);
            Assert.Equal(originalRequestSettings.ModelId, requestSettings.ModelId);
        }

        [Fact]
        public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInSnakeCase()
        {
            // Arrange
            var originalRequestSettings = new AIRequestSettings()
            var originalRequestSettings = new PromptExecutionSettings()
            {
                ServiceId = "test",
                ModelId = "test",
                ExtensionData = new Dictionary<string, object>
                {
                    { "frequency_penalty", 0.5 },
@@ -131,9 +131,9 @@ namespace LLama.Unittest.SemanticKernel
        public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInPascalCase()
        {
            // Arrange
            var originalRequestSettings = new AIRequestSettings()
            var originalRequestSettings = new PromptExecutionSettings()
            {
                ServiceId = "test",
                ModelId = "test",
                ExtensionData = new Dictionary<string, object>
                {
                    { "FrequencyPenalty", 0.5 },
--- a/LLama/LLamaEmbedder.cs
+++ b/LLama/LLamaEmbedder.cs
@@ -20,30 +20,9 @@ namespace LLama
        public int EmbeddingSize => _ctx.EmbeddingSize;

        /// <summary>
        /// Create a new embedder (loading temporary weights)
        /// LLama Context
        /// </summary>
        /// <param name="allParams"></param>
        /// <param name="logger"></param>
        [Obsolete("Preload LLamaWeights and use the constructor which accepts them")]
        public LLamaEmbedder(ILLamaParams allParams, ILogger? logger = null)
            : this(allParams, allParams, logger)
        {
        }

        /// <summary>
        /// Create a new embedder (loading temporary weights)
        /// </summary>
        /// <param name="modelParams"></param>
        /// <param name="contextParams"></param>
        /// <param name="logger"></param>
        [Obsolete("Preload LLamaWeights and use the constructor which accepts them")]
        public LLamaEmbedder(IModelParams modelParams, IContextParams contextParams, ILogger? logger = null)
        {
            using var weights = LLamaWeights.LoadFromFile(modelParams);

            contextParams.EmbeddingMode = true;
            _ctx = weights.CreateContext(contextParams, logger);
        }
        public LLamaContext Context => this._ctx;

        /// <summary>
        /// Create a new embedder, using the given LLamaWeights
@@ -117,5 +96,6 @@ namespace LLama
        {
            _ctx.Dispose();
        }

    }
 }