diff --git a/LLama.Examples/Examples/KernelMemory.cs b/LLama.Examples/Examples/KernelMemory.cs index 0aea3d7a..a204600b 100644 --- a/LLama.Examples/Examples/KernelMemory.cs +++ b/LLama.Examples/Examples/KernelMemory.cs @@ -16,6 +16,11 @@ namespace LLama.Examples.Examples Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs"); Console.Write("Please input your model path: "); var modelPath = Console.ReadLine(); + var searchClientConfig = new SearchClientConfig + { + MaxMatchesCount = 1, + AnswerTokens = 100, + }; var memory = new KernelMemoryBuilder() .WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath) { @@ -24,6 +29,7 @@ namespace LLama.Examples.Examples AntiPrompts = new List { "\n\n" } } }) + .WithSearchClientConfig(searchClientConfig) .With(new TextPartitioningOptions { MaxTokensPerParagraph = 300, diff --git a/LLama.Examples/Examples/SemanticKernelChat.cs b/LLama.Examples/Examples/SemanticKernelChat.cs index 39870f1b..52324eed 100644 --- a/LLama.Examples/Examples/SemanticKernelChat.cs +++ b/LLama.Examples/Examples/SemanticKernelChat.cs @@ -2,6 +2,7 @@ using LLama.Common; using Microsoft.SemanticKernel.AI.ChatCompletion; using LLamaSharp.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.ChatCompletion; namespace LLama.Examples.Examples { @@ -9,7 +10,7 @@ namespace LLama.Examples.Examples { public static async Task Run() { - Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs"); Console.Write("Please input your model path: "); var modelPath = Console.ReadLine(); @@ -29,8 +30,8 @@ namespace LLama.Examples.Examples await MessageOutputAsync(chatHistory); // First bot assistant message - string reply = await chatGPT.GenerateMessageAsync(chatHistory); - chatHistory.AddAssistantMessage(reply); + var reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.AddAssistantMessage(reply.Content); await MessageOutputAsync(chatHistory); // Second user message @@ -38,15 +39,15 @@ namespace LLama.Examples.Examples await MessageOutputAsync(chatHistory); // Second bot assistant message - reply = await chatGPT.GenerateMessageAsync(chatHistory); - chatHistory.AddAssistantMessage(reply); + reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.AddAssistantMessage(reply.Content); await MessageOutputAsync(chatHistory); } /// /// Outputs the last message of the chat history /// - private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory) + private static Task MessageOutputAsync(Microsoft.SemanticKernel.ChatCompletion.ChatHistory chatHistory) { var message = chatHistory.Last(); diff --git a/LLama.Examples/Examples/SemanticKernelPrompt.cs b/LLama.Examples/Examples/SemanticKernelPrompt.cs index c4974d67..40838e8b 100644 --- a/LLama.Examples/Examples/SemanticKernelPrompt.cs +++ b/LLama.Examples/Examples/SemanticKernelPrompt.cs @@ -2,8 +2,9 @@ using LLama.Common; using LLamaSharp.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.AI.TextCompletion; using LLamaSharp.SemanticKernel.TextCompletion; +using Microsoft.SemanticKernel.TextGeneration; +using Microsoft.Extensions.DependencyInjection; namespace LLama.Examples.Examples { @@ -11,7 +12,7 @@ namespace LLama.Examples.Examples { public static async Task Run() { - Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs"); + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); Console.Write("Please input your model path: "); var modelPath = Console.ReadLine(); @@ -20,8 +21,8 @@ namespace LLama.Examples.Examples using var model = LLamaWeights.LoadFromFile(parameters); var ex = new StatelessExecutor(model, parameters); - var builder = new KernelBuilder(); - builder.WithAIService("local-llama", new LLamaSharpTextCompletion(ex), true); + var builder = Kernel.CreateBuilder(); + builder.Services.AddKeyedSingleton("local-llama", new LLamaSharpTextCompletion(ex)); var kernel = builder.Build(); @@ -29,8 +30,8 @@ namespace LLama.Examples.Examples One line TLDR with the fewest words."; - ChatRequestSettings settings = new() {MaxTokens = 100}; - var summarize = kernel.CreateSemanticFunction(prompt, requestSettings: settings); + ChatRequestSettings settings = new() { MaxTokens = 100 }; + var summarize = kernel.CreateFunctionFromPrompt(prompt, settings); string text1 = @" 1st Law of Thermodynamics - Energy cannot be created or destroyed. @@ -42,10 +43,9 @@ One line TLDR with the fewest words."; 2. The acceleration of an object depends on the mass of the object and the amount of force applied. 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; - Console.WriteLine((await kernel.RunAsync(text1, summarize)).GetValue()); + Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue()); - Console.WriteLine((await kernel.RunAsync(text2, summarize)).GetValue()); + Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue()); } } } - \ No newline at end of file diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj index eb70e218..6f6ae4dc 100644 --- a/LLama.Examples/LLama.Examples.csproj +++ b/LLama.Examples/LLama.Examples.csproj @@ -1,4 +1,4 @@ - + Exe @@ -9,28 +9,15 @@ true true - - - - 1701;1702;8604 - - - - 1701;1702;8604 - - - - 1701;1702;8604 - - - - 1701;1702;8604 + 12 + 1701;1702;8604;SKEXP0001;SKEXP0052;SKEXP0003 - - + + + diff --git a/LLama.KernelMemory/BuilderExtensions.cs b/LLama.KernelMemory/BuilderExtensions.cs index 5f476ce8..7afac4bb 100644 --- a/LLama.KernelMemory/BuilderExtensions.cs +++ b/LLama.KernelMemory/BuilderExtensions.cs @@ -17,19 +17,6 @@ namespace LLamaSharp.KernelMemory public static class BuilderExtensions { - private static IKernelMemoryBuilder WithCustomEmbeddingGeneration(this IKernelMemoryBuilder builder, ITextEmbeddingGeneration embeddingGeneration) - { - builder.AddSingleton(embeddingGeneration); - builder.AddIngestionEmbeddingGenerator(embeddingGeneration); - return builder; - } - - private static IKernelMemoryBuilder WithCustomTextGeneration(this IKernelMemoryBuilder builder, ITextGeneration textGeneration) - { - builder.AddSingleton(textGeneration); - return builder; - } - /// /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder. /// @@ -38,7 +25,9 @@ namespace LLamaSharp.KernelMemory /// The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added. public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config) { - builder.WithCustomEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(config)); + var generator = new LLamaSharpTextEmbeddingGenerator(config); + builder.AddSingleton(generator); + builder.AddIngestionEmbeddingGenerator(generator); return builder; } @@ -46,11 +35,12 @@ namespace LLamaSharp.KernelMemory /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder. /// /// The KernelMemoryBuilder instance. - /// The LLamaSharpTextEmbeddingGeneration instance. + /// The LLamaSharpTextEmbeddingGeneration instance. /// The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added. - public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGeneration textEmbeddingGeneration) + public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGenerator textEmbeddingGenerator) { - builder.WithCustomEmbeddingGeneration(textEmbeddingGeneration); + builder.AddSingleton(textEmbeddingGenerator); + builder.AddIngestionEmbeddingGenerator(textEmbeddingGenerator); return builder; } @@ -62,7 +52,7 @@ namespace LLamaSharp.KernelMemory /// The KernelMemoryBuilder instance with LLamaSharpTextGeneration added. public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config) { - builder.WithCustomTextGeneration(new LlamaSharpTextGeneration(config)); + builder.AddSingleton(new LlamaSharpTextGenerator(config)); return builder; } @@ -70,11 +60,11 @@ namespace LLamaSharp.KernelMemory /// Adds LLamaSharpTextGeneration to the KernelMemoryBuilder. /// /// The KernelMemoryBuilder instance. - /// The LlamaSharpTextGeneration instance. + /// The LlamaSharpTextGeneration instance. /// The KernelMemoryBuilder instance with LLamaSharpTextGeneration added. - public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGeneration textGeneration) + public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGenerator textGenerator) { - builder.WithCustomTextGeneration(textGeneration); + builder.AddSingleton(textGenerator); return builder; } @@ -96,8 +86,8 @@ namespace LLamaSharp.KernelMemory var context = weights.CreateContext(parameters); var executor = new StatelessExecutor(weights, parameters); var embedder = new LLamaEmbedder(weights, parameters); - builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(embedder)); - builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGeneration(weights, context, executor, config?.DefaultInferenceParams)); + builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGenerator(embedder)); + builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGenerator(weights, context, executor, config?.DefaultInferenceParams)); return builder; } } diff --git a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj index 3867b7e1..a9bb5073 100644 --- a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj +++ b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj @@ -4,8 +4,6 @@ net6.0;net7.0;net8.0 enable enable - - 0.7.1 0.8.0 Xbotter SciSharp STACK @@ -29,7 +27,7 @@ - + diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs similarity index 71% rename from LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs rename to LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs index 4421ed8a..8148adc8 100644 --- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs +++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs @@ -1,6 +1,8 @@ using LLama; using LLama.Abstractions; using LLama.Common; +using Microsoft.KernelMemory; +using Microsoft.KernelMemory.AI; using Microsoft.SemanticKernel.AI.Embeddings; using System; using System.Collections.Generic; @@ -13,22 +15,23 @@ namespace LLamaSharp.KernelMemory /// /// Provides text embedding generation for LLamaSharp. /// - public class LLamaSharpTextEmbeddingGeneration : ITextEmbeddingGeneration, IDisposable + public class LLamaSharpTextEmbeddingGenerator + : ITextEmbeddingGenerator, IDisposable { private readonly LLamaSharpConfig? _config; private readonly LLamaWeights? _weights; private readonly LLamaEmbedder _embedder; private bool _ownsEmbedder = false; private bool _ownsWeights = false; - private readonly Dictionary _attributes = new(); - public IReadOnlyDictionary Attributes => this._attributes; + /// + public int MaxTokens => (int?)_config?.ContextSize ?? 2048; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// The configuration for LLamaSharp. - public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config) + public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config) { this._config = config; var @params = new ModelParams(_config.ModelPath); @@ -39,11 +42,11 @@ namespace LLamaSharp.KernelMemory } /// - /// Initializes a new instance of the class from reused weights. + /// Initializes a new instance of the class from reused weights. /// /// The configuration for LLamaSharp. /// A LLamaWeights object. - public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config, LLamaWeights weights) + public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights weights) { this._config = config; var @params = new ModelParams(_config.ModelPath); @@ -53,10 +56,10 @@ namespace LLamaSharp.KernelMemory } /// - /// Initializes a new instance of the class from reused embedder. + /// Initializes a new instance of the class from reused embedder. /// /// A LLamaEmbedder object. - public LLamaSharpTextEmbeddingGeneration(LLamaEmbedder embedder) + public LLamaSharpTextEmbeddingGenerator(LLamaEmbedder embedder) { this._config = null; this._weights = null; @@ -89,5 +92,15 @@ namespace LLamaSharp.KernelMemory return Task.FromResult(results); } + + /// + public Task GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default) + { + var embeddings = _embedder.GetEmbeddings(text); + return Task.FromResult(new Embedding(embeddings)); + } + + /// + public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length; } } diff --git a/LLama.KernelMemory/LlamaSharpTextGeneration.cs b/LLama.KernelMemory/LlamaSharpTextGenerator.cs similarity index 86% rename from LLama.KernelMemory/LlamaSharpTextGeneration.cs rename to LLama.KernelMemory/LlamaSharpTextGenerator.cs index 663a77cf..7269152b 100644 --- a/LLama.KernelMemory/LlamaSharpTextGeneration.cs +++ b/LLama.KernelMemory/LlamaSharpTextGenerator.cs @@ -13,7 +13,7 @@ namespace LLamaSharp.KernelMemory /// /// Provides text generation for LLamaSharp. /// - public class LlamaSharpTextGeneration : ITextGeneration, IDisposable + public class LlamaSharpTextGenerator : ITextGenerator, IDisposable { private readonly LLamaWeights _weights; private readonly StatelessExecutor _executor; @@ -22,11 +22,13 @@ namespace LLamaSharp.KernelMemory private bool _ownsContext = false; private bool _ownsWeights = false; + public int MaxTokenTotal { get; } + /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// The configuration for LLamaSharp. - public LlamaSharpTextGeneration(LLamaSharpConfig config) + public LlamaSharpTextGenerator(LLamaSharpConfig config) { var parameters = new ModelParams(config.ModelPath) { @@ -39,21 +41,23 @@ namespace LLamaSharp.KernelMemory _executor = new StatelessExecutor(_weights, parameters); _defaultInferenceParams = config?.DefaultInferenceParams; _ownsWeights = _ownsContext = true; + MaxTokenTotal = (int)parameters.ContextSize; } /// - /// Initializes a new instance of the class from reused weights, context and executor. + /// Initializes a new instance of the class from reused weights, context and executor. /// If executor is not specified, then a StatelessExecutor will be created with `context.Params`. So far only `StatelessExecutor` is expected. /// /// A LLamaWeights object. /// A LLamaContext object. /// An executor. Currently only StatelessExecutor is expected. - public LlamaSharpTextGeneration(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null) + public LlamaSharpTextGenerator(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null) { _weights = weights; _context = context; _executor = executor ?? new StatelessExecutor(_weights, _context.Params); _defaultInferenceParams = inferenceParams; + MaxTokenTotal = (int)_context.Params.ContextSize; } /// @@ -102,5 +106,8 @@ namespace LLamaSharp.KernelMemory }; } } + + /// + public int CountTokens(string text) => _context.Tokenize(text).Length; } } diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs index e04ee9e4..ac22e1fc 100644 --- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs +++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs @@ -1,10 +1,10 @@ -using Microsoft.SemanticKernel.AI; +using Microsoft.SemanticKernel; using System.Text.Json; using System.Text.Json.Serialization; namespace LLamaSharp.SemanticKernel.ChatCompletion; -public class ChatRequestSettings : AIRequestSettings +public class ChatRequestSettings : PromptExecutionSettings { /// /// Temperature controls the randomness of the completion. @@ -68,7 +68,7 @@ public class ChatRequestSettings : AIRequestSettings /// Template configuration /// Default max tokens /// An instance of OpenAIRequestSettings - public static ChatRequestSettings FromRequestSettings(AIRequestSettings? requestSettings, int? defaultMaxTokens = null) + public static ChatRequestSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null) { if (requestSettings is null) { diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs index f0d3a430..e320ea3f 100644 --- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs +++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs @@ -31,6 +31,10 @@ public class ChatRequestSettingsConverter : JsonConverter switch (propertyName) { + case "MODELID": + case "MODEL_ID": + requestSettings.ModelId = reader.GetString(); + break; case "TEMPERATURE": requestSettings.Temperature = reader.GetDouble(); break; @@ -62,10 +66,6 @@ public class ChatRequestSettingsConverter : JsonConverter case "TOKEN_SELECTION_BIASES": requestSettings.TokenSelectionBiases = JsonSerializer.Deserialize>(ref reader, options) ?? new Dictionary(); break; - case "SERVICEID": - case "SERVICE_ID": - requestSettings.ServiceId = reader.GetString(); - break; default: reader.Skip(); break; @@ -98,7 +98,6 @@ public class ChatRequestSettingsConverter : JsonConverter writer.WriteNumber("results_per_prompt", value.ResultsPerPrompt); writer.WritePropertyName("token_selection_biases"); JsonSerializer.Serialize(writer, value.TokenSelectionBiases, options); - writer.WriteString("service_id", value.ServiceId); writer.WriteEndObject(); } diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs index 7e5425bb..b1c0d347 100644 --- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs @@ -1,8 +1,12 @@ using LLama; using LLama.Abstractions; -using Microsoft.SemanticKernel.AI; -using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.Services; +using System; +using System.IO; using System.Runtime.CompilerServices; +using System.Text; using static LLama.LLamaTransforms; namespace LLamaSharp.SemanticKernel.ChatCompletion; @@ -10,16 +14,16 @@ namespace LLamaSharp.SemanticKernel.ChatCompletion; /// /// LLamaSharp ChatCompletion /// -public sealed class LLamaSharpChatCompletion : IChatCompletion +public sealed class LLamaSharpChatCompletion : IChatCompletionService { private readonly StatelessExecutor _model; private ChatRequestSettings defaultRequestSettings; private readonly IHistoryTransform historyTransform; private readonly ITextStreamTransform outputTransform; - private readonly Dictionary _attributes = new(); + private readonly Dictionary _attributes = new(); - public IReadOnlyDictionary Attributes => this._attributes; + public IReadOnlyDictionary Attributes => this._attributes; static ChatRequestSettings GetDefaultSettings() { @@ -45,7 +49,6 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion $"{LLama.Common.AuthorRole.System}:"}); } - /// public ChatHistory CreateNewChat(string? instructions = "") { var history = new ChatHistory(); @@ -59,30 +62,41 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion } /// - public Task> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default) + public async Task> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) { - var settings = requestSettings != null - ? ChatRequestSettings.FromRequestSettings(requestSettings) - : defaultRequestSettings; - var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory()); + var settings = executionSettings != null + ? ChatRequestSettings.FromRequestSettings(executionSettings) + : defaultRequestSettings; + var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory()); var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken); - return Task.FromResult>(new List { new LLamaSharpChatResult(outputTransform.TransformAsync(result)) }.AsReadOnly()); + var output = outputTransform.TransformAsync(result); + + var sb = new StringBuilder(); + await foreach (var token in output) + { + sb.Append(token); + } + + return new List { new(AuthorRole.Assistant, sb.ToString()) }.AsReadOnly(); } /// -#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously. - public async IAsyncEnumerable GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) -#pragma warning restore CS1998 + public async IAsyncEnumerable GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - var settings = requestSettings != null - ? ChatRequestSettings.FromRequestSettings(requestSettings) - : defaultRequestSettings; - var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory()); - // This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable. + var settings = executionSettings != null + ? ChatRequestSettings.FromRequestSettings(executionSettings) + : defaultRequestSettings; + var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory()); + var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken); - yield return new LLamaSharpChatResult(outputTransform.TransformAsync(result)); + var output = outputTransform.TransformAsync(result); + + await foreach (var token in output) + { + yield return new StreamingChatMessageContent(AuthorRole.Assistant, token); + } } } diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs deleted file mode 100644 index 1069feda..00000000 --- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs +++ /dev/null @@ -1,14 +0,0 @@ -using Microsoft.SemanticKernel.AI.ChatCompletion; - -namespace LLamaSharp.SemanticKernel.ChatCompletion; - -/// -/// LLamaSharp Chat Message -/// -public class LLamaSharpChatMessage : ChatMessage -{ - /// - public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content) - { - } -} diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs deleted file mode 100644 index 07c3ac17..00000000 --- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs +++ /dev/null @@ -1,44 +0,0 @@ -using Microsoft.SemanticKernel.AI.ChatCompletion; -using Microsoft.SemanticKernel.Orchestration; -using System.Runtime.CompilerServices; -using System.Text; - -namespace LLamaSharp.SemanticKernel.ChatCompletion; - -internal sealed class LLamaSharpChatResult : IChatResult, IChatStreamingResult -{ - private readonly ModelResult _modelResult; - private readonly IAsyncEnumerable _stream; - - /// - /// - /// - /// - public LLamaSharpChatResult(IAsyncEnumerable stream) - { - _stream = stream; - this._modelResult = new ModelResult(stream); - } - - public ModelResult ModelResult => this._modelResult; - - /// - public async Task GetChatMessageAsync(CancellationToken cancellationToken = default) - { - var sb = new StringBuilder(); - await foreach (var token in _stream) - { - sb.Append(token); - } - return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false); - } - - /// - public async IAsyncEnumerable GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) - { - await foreach (var token in _stream) - { - yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token); - } - } -} diff --git a/LLama.SemanticKernel/ExtensionMethods.cs b/LLama.SemanticKernel/ExtensionMethods.cs index 6f39e373..85f9064c 100644 --- a/LLama.SemanticKernel/ExtensionMethods.cs +++ b/LLama.SemanticKernel/ExtensionMethods.cs @@ -1,6 +1,5 @@ using LLamaSharp.SemanticKernel.ChatCompletion; -using Microsoft.SemanticKernel.AI.ChatCompletion; - +using Microsoft.SemanticKernel.ChatCompletion; namespace LLamaSharp.SemanticKernel; public static class ExtensionMethods diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj index f787ac50..2f365924 100644 --- a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj +++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj @@ -30,10 +30,11 @@ AnyCPU;x64;Arm64 LLamaSharp.semantic-kernel Debug;Release;GPU + SKEXP0001,SKEXP0052 - + diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs index 059a9ff3..08ec33e1 100644 --- a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs +++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs @@ -1,37 +1,47 @@ using LLama.Abstractions; using LLamaSharp.SemanticKernel.ChatCompletion; -using Microsoft.SemanticKernel.AI; -using Microsoft.SemanticKernel.AI.TextCompletion; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Services; +using Microsoft.SemanticKernel.TextGeneration; using System.Runtime.CompilerServices; +using System.Text; namespace LLamaSharp.SemanticKernel.TextCompletion; -public sealed class LLamaSharpTextCompletion : ITextCompletion +public sealed class LLamaSharpTextCompletion : ITextGenerationService { public ILLamaExecutor executor; - private readonly Dictionary _attributes = new(); + private readonly Dictionary _attributes = new(); - public IReadOnlyDictionary Attributes => this._attributes; + public IReadOnlyDictionary Attributes => this._attributes; public LLamaSharpTextCompletion(ILLamaExecutor executor) { this.executor = executor; } - public async Task> GetCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default) + /// + public async Task> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) { - var settings = ChatRequestSettings.FromRequestSettings(requestSettings); - var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken); - return await Task.FromResult(new List { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false); + var settings = ChatRequestSettings.FromRequestSettings(executionSettings); + var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken); + var sb = new StringBuilder(); + await foreach (var token in result) + { + sb.Append(token); + } + return new List { new(sb.ToString()) }; } -#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously. - public async IAsyncEnumerable GetStreamingCompletionsAsync(string text, AIRequestSettings? requestSettings,[EnumeratorCancellation] CancellationToken cancellationToken = default) -#pragma warning restore CS1998 + /// + public async IAsyncEnumerable GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - var settings = ChatRequestSettings.FromRequestSettings(requestSettings); - var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken); - yield return new LLamaTextResult(result); + var settings = ChatRequestSettings.FromRequestSettings(executionSettings); + var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken); + await foreach (var token in result) + { + yield return new StreamingTextContent(token); + } } } diff --git a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs deleted file mode 100644 index b66013ba..00000000 --- a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs +++ /dev/null @@ -1,37 +0,0 @@ -using Microsoft.SemanticKernel.AI.TextCompletion; -using Microsoft.SemanticKernel.Orchestration; -using System.Runtime.CompilerServices; -using System.Text; - -namespace LLamaSharp.SemanticKernel.TextCompletion; - -internal sealed class LLamaTextResult : ITextResult, ITextStreamingResult -{ - private readonly IAsyncEnumerable _text; - - public LLamaTextResult(IAsyncEnumerable text) - { - _text = text; - ModelResult = new(text); - } - - public ModelResult ModelResult { get; } - - public async Task GetCompletionAsync(CancellationToken cancellationToken = default) - { - var sb = new StringBuilder(); - await foreach (var token in _text) - { - sb.Append(token); - } - return await Task.FromResult(sb.ToString()).ConfigureAwait(false); - } - - public async IAsyncEnumerable GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) - { - await foreach (string word in _text) - { - yield return word; - } - } -} diff --git a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs index 155c5406..73ceb0f2 100644 --- a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs +++ b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs @@ -1,15 +1,16 @@ using LLama; -using Microsoft.SemanticKernel.AI.Embeddings; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Embeddings; namespace LLamaSharp.SemanticKernel.TextEmbedding; -public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration +public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGenerationService { private LLamaEmbedder _embedder; - private readonly Dictionary _attributes = new(); + private readonly Dictionary _attributes = new(); - public IReadOnlyDictionary Attributes => this._attributes; + public IReadOnlyDictionary Attributes => this._attributes; public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder) { @@ -17,7 +18,7 @@ public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration } /// - public async Task>> GenerateEmbeddingsAsync(IList data, CancellationToken cancellationToken = default) + public async Task>> GenerateEmbeddingsAsync(IList data, Kernel? kernel = null, CancellationToken cancellationToken = default) { var embeddings = data.Select(text => new ReadOnlyMemory(_embedder.GetEmbeddings(text))).ToList(); return await Task.FromResult(embeddings); diff --git a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs index 99881b57..ef5d9670 100644 --- a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs +++ b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs @@ -1,5 +1,5 @@ using LLamaSharp.SemanticKernel.ChatCompletion; -using Microsoft.SemanticKernel.AI; +using Microsoft.SemanticKernel; namespace LLama.Unittest.SemanticKernel { @@ -75,9 +75,9 @@ namespace LLama.Unittest.SemanticKernel public void ChatRequestSettings_FromAIRequestSettings() { // Arrange - var originalRequestSettings = new AIRequestSettings() + var originalRequestSettings = new PromptExecutionSettings() { - ServiceId = "test", + ModelId = "test", }; // Act @@ -85,16 +85,16 @@ namespace LLama.Unittest.SemanticKernel // Assert Assert.NotNull(requestSettings); - Assert.Equal(originalRequestSettings.ServiceId, requestSettings.ServiceId); + Assert.Equal(originalRequestSettings.ModelId, requestSettings.ModelId); } [Fact] public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInSnakeCase() { // Arrange - var originalRequestSettings = new AIRequestSettings() + var originalRequestSettings = new PromptExecutionSettings() { - ServiceId = "test", + ModelId = "test", ExtensionData = new Dictionary { { "frequency_penalty", 0.5 }, @@ -131,9 +131,9 @@ namespace LLama.Unittest.SemanticKernel public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInPascalCase() { // Arrange - var originalRequestSettings = new AIRequestSettings() + var originalRequestSettings = new PromptExecutionSettings() { - ServiceId = "test", + ModelId = "test", ExtensionData = new Dictionary { { "FrequencyPenalty", 0.5 }, diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index ee23cd39..ab56280c 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -20,30 +20,9 @@ namespace LLama public int EmbeddingSize => _ctx.EmbeddingSize; /// - /// Create a new embedder (loading temporary weights) + /// LLama Context /// - /// - /// - [Obsolete("Preload LLamaWeights and use the constructor which accepts them")] - public LLamaEmbedder(ILLamaParams allParams, ILogger? logger = null) - : this(allParams, allParams, logger) - { - } - - /// - /// Create a new embedder (loading temporary weights) - /// - /// - /// - /// - [Obsolete("Preload LLamaWeights and use the constructor which accepts them")] - public LLamaEmbedder(IModelParams modelParams, IContextParams contextParams, ILogger? logger = null) - { - using var weights = LLamaWeights.LoadFromFile(modelParams); - - contextParams.EmbeddingMode = true; - _ctx = weights.CreateContext(contextParams, logger); - } + public LLamaContext Context => this._ctx; /// /// Create a new embedder, using the given LLamaWeights @@ -117,5 +96,6 @@ namespace LLama { _ctx.Dispose(); } + } }