From 13a312b4ecb7f1ca65650b2b275b868271d4ec88 Mon Sep 17 00:00:00 2001 From: xbotter Date: Mon, 11 Dec 2023 19:39:01 +0800 Subject: [PATCH 1/4] update sk to 1.0.0-rc3 & km to 0.18 --- LLama.Examples/Examples/KernelMemory.cs | 6 +++ LLama.Examples/Examples/SemanticKernelChat.cs | 4 +- .../Examples/SemanticKernelPrompt.cs | 14 ++--- LLama.Examples/LLama.Examples.csproj | 24 ++------- LLama.KernelMemory/BuilderExtensions.cs | 36 +++++-------- .../LLamaSharp.KernelMemory.csproj | 4 +- ...cs => LLamaSharpTextEmbeddingGenerator.cs} | 31 +++++++---- ...neration.cs => LlamaSharpTextGenerator.cs} | 17 +++++-- .../ChatCompletion/ChatRequestSettings.cs | 4 +- .../LLamaSharpChatCompletion.cs | 51 ++++++++++++------- .../ChatCompletion/LLamaSharpChatMessage.cs | 14 ----- .../ChatCompletion/LLamaSharpChatResult.cs | 44 ---------------- .../LLamaSharp.SemanticKernel.csproj | 3 +- .../LLamaSharpTextCompletion.cs | 37 +++++++++----- .../TextCompletion/LLamaTextResult.cs | 37 -------------- .../LLamaSharpEmbeddingGeneration.cs | 7 +-- .../ChatRequestSettingsTests.cs | 6 +-- LLama/LLamaEmbedder.cs | 8 +++ 18 files changed, 146 insertions(+), 201 deletions(-) rename LLama.KernelMemory/{LLamaSharpTextEmbeddingGeneration.cs => LLamaSharpTextEmbeddingGenerator.cs} (72%) rename LLama.KernelMemory/{LlamaSharpTextGeneration.cs => LlamaSharpTextGenerator.cs} (86%) delete mode 100644 LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs delete mode 100644 LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs delete mode 100644 LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs diff --git a/LLama.Examples/Examples/KernelMemory.cs b/LLama.Examples/Examples/KernelMemory.cs index 0aea3d7a..a204600b 100644 --- a/LLama.Examples/Examples/KernelMemory.cs +++ b/LLama.Examples/Examples/KernelMemory.cs @@ -16,6 +16,11 @@ namespace LLama.Examples.Examples Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs"); Console.Write("Please input your model path: "); var modelPath = Console.ReadLine(); + var searchClientConfig = new SearchClientConfig + { + MaxMatchesCount = 1, + AnswerTokens = 100, + }; var memory = new KernelMemoryBuilder() .WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath) { @@ -24,6 +29,7 @@ namespace LLama.Examples.Examples AntiPrompts = new List { "\n\n" } } }) + .WithSearchClientConfig(searchClientConfig) .With(new TextPartitioningOptions { MaxTokensPerParagraph = 300, diff --git a/LLama.Examples/Examples/SemanticKernelChat.cs b/LLama.Examples/Examples/SemanticKernelChat.cs index 39870f1b..a9d5be54 100644 --- a/LLama.Examples/Examples/SemanticKernelChat.cs +++ b/LLama.Examples/Examples/SemanticKernelChat.cs @@ -29,7 +29,7 @@ namespace LLama.Examples.Examples await MessageOutputAsync(chatHistory); // First bot assistant message - string reply = await chatGPT.GenerateMessageAsync(chatHistory); + string reply = await chatGPT.GetChatMessageContentAsync(chatHistory); chatHistory.AddAssistantMessage(reply); await MessageOutputAsync(chatHistory); @@ -38,7 +38,7 @@ namespace LLama.Examples.Examples await MessageOutputAsync(chatHistory); // Second bot assistant message - reply = await chatGPT.GenerateMessageAsync(chatHistory); + reply = await chatGPT.GetChatMessageContentAsync(chatHistory); chatHistory.AddAssistantMessage(reply); await MessageOutputAsync(chatHistory); } diff --git a/LLama.Examples/Examples/SemanticKernelPrompt.cs b/LLama.Examples/Examples/SemanticKernelPrompt.cs index c4974d67..21cb55de 100644 --- a/LLama.Examples/Examples/SemanticKernelPrompt.cs +++ b/LLama.Examples/Examples/SemanticKernelPrompt.cs @@ -2,8 +2,9 @@ using LLama.Common; using LLamaSharp.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.AI.TextCompletion; using LLamaSharp.SemanticKernel.TextCompletion; +using Microsoft.SemanticKernel.AI.TextGeneration; +using Microsoft.Extensions.DependencyInjection; namespace LLama.Examples.Examples { @@ -21,7 +22,7 @@ namespace LLama.Examples.Examples var ex = new StatelessExecutor(model, parameters); var builder = new KernelBuilder(); - builder.WithAIService("local-llama", new LLamaSharpTextCompletion(ex), true); + builder.Services.AddKeyedSingleton("local-llama", new LLamaSharpTextCompletion(ex)); var kernel = builder.Build(); @@ -29,8 +30,8 @@ namespace LLama.Examples.Examples One line TLDR with the fewest words."; - ChatRequestSettings settings = new() {MaxTokens = 100}; - var summarize = kernel.CreateSemanticFunction(prompt, requestSettings: settings); + ChatRequestSettings settings = new() { MaxTokens = 100 }; + var summarize = kernel.CreateFunctionFromPrompt(prompt, settings); string text1 = @" 1st Law of Thermodynamics - Energy cannot be created or destroyed. @@ -42,10 +43,9 @@ One line TLDR with the fewest words."; 2. The acceleration of an object depends on the mass of the object and the amount of force applied. 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; - Console.WriteLine((await kernel.RunAsync(text1, summarize)).GetValue()); + Console.WriteLine((await kernel.InvokeAsync(summarize,new KernelArguments(text1))).GetValue()); - Console.WriteLine((await kernel.RunAsync(text2, summarize)).GetValue()); + Console.WriteLine((await kernel.InvokeAsync(summarize, new KernelArguments(text2))).GetValue()); } } } - \ No newline at end of file diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj index d158f05f..9e4f17ab 100644 --- a/LLama.Examples/LLama.Examples.csproj +++ b/LLama.Examples/LLama.Examples.csproj @@ -1,4 +1,4 @@ - + Exe @@ -9,28 +9,14 @@ true true - - - - 1701;1702;8604 - - - - 1701;1702;8604 - - - - 1701;1702;8604 - - - - 1701;1702;8604 + 12 + 1701;1702;8604;SKEXP0001;SKEXP0052;SKEXP0003 - - + + diff --git a/LLama.KernelMemory/BuilderExtensions.cs b/LLama.KernelMemory/BuilderExtensions.cs index 5f476ce8..7afac4bb 100644 --- a/LLama.KernelMemory/BuilderExtensions.cs +++ b/LLama.KernelMemory/BuilderExtensions.cs @@ -17,19 +17,6 @@ namespace LLamaSharp.KernelMemory public static class BuilderExtensions { - private static IKernelMemoryBuilder WithCustomEmbeddingGeneration(this IKernelMemoryBuilder builder, ITextEmbeddingGeneration embeddingGeneration) - { - builder.AddSingleton(embeddingGeneration); - builder.AddIngestionEmbeddingGenerator(embeddingGeneration); - return builder; - } - - private static IKernelMemoryBuilder WithCustomTextGeneration(this IKernelMemoryBuilder builder, ITextGeneration textGeneration) - { - builder.AddSingleton(textGeneration); - return builder; - } - /// /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder. /// @@ -38,7 +25,9 @@ namespace LLamaSharp.KernelMemory /// The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added. public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config) { - builder.WithCustomEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(config)); + var generator = new LLamaSharpTextEmbeddingGenerator(config); + builder.AddSingleton(generator); + builder.AddIngestionEmbeddingGenerator(generator); return builder; } @@ -46,11 +35,12 @@ namespace LLamaSharp.KernelMemory /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder. /// /// The KernelMemoryBuilder instance. - /// The LLamaSharpTextEmbeddingGeneration instance. + /// The LLamaSharpTextEmbeddingGeneration instance. /// The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added. - public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGeneration textEmbeddingGeneration) + public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGenerator textEmbeddingGenerator) { - builder.WithCustomEmbeddingGeneration(textEmbeddingGeneration); + builder.AddSingleton(textEmbeddingGenerator); + builder.AddIngestionEmbeddingGenerator(textEmbeddingGenerator); return builder; } @@ -62,7 +52,7 @@ namespace LLamaSharp.KernelMemory /// The KernelMemoryBuilder instance with LLamaSharpTextGeneration added. public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config) { - builder.WithCustomTextGeneration(new LlamaSharpTextGeneration(config)); + builder.AddSingleton(new LlamaSharpTextGenerator(config)); return builder; } @@ -70,11 +60,11 @@ namespace LLamaSharp.KernelMemory /// Adds LLamaSharpTextGeneration to the KernelMemoryBuilder. /// /// The KernelMemoryBuilder instance. - /// The LlamaSharpTextGeneration instance. + /// The LlamaSharpTextGeneration instance. /// The KernelMemoryBuilder instance with LLamaSharpTextGeneration added. - public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGeneration textGeneration) + public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGenerator textGenerator) { - builder.WithCustomTextGeneration(textGeneration); + builder.AddSingleton(textGenerator); return builder; } @@ -96,8 +86,8 @@ namespace LLamaSharp.KernelMemory var context = weights.CreateContext(parameters); var executor = new StatelessExecutor(weights, parameters); var embedder = new LLamaEmbedder(weights, parameters); - builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(embedder)); - builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGeneration(weights, context, executor, config?.DefaultInferenceParams)); + builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGenerator(embedder)); + builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGenerator(weights, context, executor, config?.DefaultInferenceParams)); return builder; } } diff --git a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj index 78d4712b..bf3280a3 100644 --- a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj +++ b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj @@ -4,8 +4,6 @@ net6.0;net7.0 enable enable - - 0.7.1 0.8.0 Xbotter SciSharp STACK @@ -29,7 +27,7 @@ - + diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs similarity index 72% rename from LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs rename to LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs index 4421ed8a..a00c5352 100644 --- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs +++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs @@ -1,6 +1,8 @@ using LLama; using LLama.Abstractions; using LLama.Common; +using Microsoft.KernelMemory; +using Microsoft.KernelMemory.AI; using Microsoft.SemanticKernel.AI.Embeddings; using System; using System.Collections.Generic; @@ -13,22 +15,23 @@ namespace LLamaSharp.KernelMemory /// /// Provides text embedding generation for LLamaSharp. /// - public class LLamaSharpTextEmbeddingGeneration : ITextEmbeddingGeneration, IDisposable + public class LLamaSharpTextEmbeddingGenerator + : ITextEmbeddingGenerator, IDisposable { private readonly LLamaSharpConfig? _config; private readonly LLamaWeights? _weights; private readonly LLamaEmbedder _embedder; private bool _ownsEmbedder = false; private bool _ownsWeights = false; - private readonly Dictionary _attributes = new(); - public IReadOnlyDictionary Attributes => this._attributes; + /// + public int MaxTokens => (int?)_config?.ContextSize ?? 2048; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// The configuration for LLamaSharp. - public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config) + public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config) { this._config = config; var @params = new ModelParams(_config.ModelPath); @@ -39,11 +42,11 @@ namespace LLamaSharp.KernelMemory } /// - /// Initializes a new instance of the class from reused weights. + /// Initializes a new instance of the class from reused weights. /// /// The configuration for LLamaSharp. /// A LLamaWeights object. - public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config, LLamaWeights weights) + public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights weights) { this._config = config; var @params = new ModelParams(_config.ModelPath); @@ -53,10 +56,10 @@ namespace LLamaSharp.KernelMemory } /// - /// Initializes a new instance of the class from reused embedder. + /// Initializes a new instance of the class from reused embedder. /// /// A LLamaEmbedder object. - public LLamaSharpTextEmbeddingGeneration(LLamaEmbedder embedder) + public LLamaSharpTextEmbeddingGenerator(LLamaEmbedder embedder) { this._config = null; this._weights = null; @@ -89,5 +92,15 @@ namespace LLamaSharp.KernelMemory return Task.FromResult(results); } + + /// + public Task GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default) + { + var embeddings = _embedder.GetEmbeddings(text); + return Task.FromResult(new Embedding(embeddings)); + } + + /// + public int CountTokens(string text) => _embedder.Tokenize(text).Length; } } diff --git a/LLama.KernelMemory/LlamaSharpTextGeneration.cs b/LLama.KernelMemory/LlamaSharpTextGenerator.cs similarity index 86% rename from LLama.KernelMemory/LlamaSharpTextGeneration.cs rename to LLama.KernelMemory/LlamaSharpTextGenerator.cs index 663a77cf..7269152b 100644 --- a/LLama.KernelMemory/LlamaSharpTextGeneration.cs +++ b/LLama.KernelMemory/LlamaSharpTextGenerator.cs @@ -13,7 +13,7 @@ namespace LLamaSharp.KernelMemory /// /// Provides text generation for LLamaSharp. /// - public class LlamaSharpTextGeneration : ITextGeneration, IDisposable + public class LlamaSharpTextGenerator : ITextGenerator, IDisposable { private readonly LLamaWeights _weights; private readonly StatelessExecutor _executor; @@ -22,11 +22,13 @@ namespace LLamaSharp.KernelMemory private bool _ownsContext = false; private bool _ownsWeights = false; + public int MaxTokenTotal { get; } + /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// The configuration for LLamaSharp. - public LlamaSharpTextGeneration(LLamaSharpConfig config) + public LlamaSharpTextGenerator(LLamaSharpConfig config) { var parameters = new ModelParams(config.ModelPath) { @@ -39,21 +41,23 @@ namespace LLamaSharp.KernelMemory _executor = new StatelessExecutor(_weights, parameters); _defaultInferenceParams = config?.DefaultInferenceParams; _ownsWeights = _ownsContext = true; + MaxTokenTotal = (int)parameters.ContextSize; } /// - /// Initializes a new instance of the class from reused weights, context and executor. + /// Initializes a new instance of the class from reused weights, context and executor. /// If executor is not specified, then a StatelessExecutor will be created with `context.Params`. So far only `StatelessExecutor` is expected. /// /// A LLamaWeights object. /// A LLamaContext object. /// An executor. Currently only StatelessExecutor is expected. - public LlamaSharpTextGeneration(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null) + public LlamaSharpTextGenerator(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null) { _weights = weights; _context = context; _executor = executor ?? new StatelessExecutor(_weights, _context.Params); _defaultInferenceParams = inferenceParams; + MaxTokenTotal = (int)_context.Params.ContextSize; } /// @@ -102,5 +106,8 @@ namespace LLamaSharp.KernelMemory }; } } + + /// + public int CountTokens(string text) => _context.Tokenize(text).Length; } } diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs index e04ee9e4..aab3240f 100644 --- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs +++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs @@ -4,7 +4,7 @@ using System.Text.Json.Serialization; namespace LLamaSharp.SemanticKernel.ChatCompletion; -public class ChatRequestSettings : AIRequestSettings +public class ChatRequestSettings : PromptExecutionSettings { /// /// Temperature controls the randomness of the completion. @@ -68,7 +68,7 @@ public class ChatRequestSettings : AIRequestSettings /// Template configuration /// Default max tokens /// An instance of OpenAIRequestSettings - public static ChatRequestSettings FromRequestSettings(AIRequestSettings? requestSettings, int? defaultMaxTokens = null) + public static ChatRequestSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null) { if (requestSettings is null) { diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs index 7e5425bb..9611a0cf 100644 --- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs @@ -1,8 +1,13 @@ using LLama; using LLama.Abstractions; +using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.AI; using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel.Services; +using System; +using System.IO; using System.Runtime.CompilerServices; +using System.Text; using static LLama.LLamaTransforms; namespace LLamaSharp.SemanticKernel.ChatCompletion; @@ -10,7 +15,7 @@ namespace LLamaSharp.SemanticKernel.ChatCompletion; /// /// LLamaSharp ChatCompletion /// -public sealed class LLamaSharpChatCompletion : IChatCompletion +public sealed class LLamaSharpChatCompletion : IChatCompletionService { private readonly StatelessExecutor _model; private ChatRequestSettings defaultRequestSettings; @@ -21,6 +26,8 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion public IReadOnlyDictionary Attributes => this._attributes; + IReadOnlyDictionary IAIService.Attributes => throw new NotImplementedException(); + static ChatRequestSettings GetDefaultSettings() { return new ChatRequestSettings @@ -45,7 +52,6 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion $"{LLama.Common.AuthorRole.System}:"}); } - /// public ChatHistory CreateNewChat(string? instructions = "") { var history = new ChatHistory(); @@ -59,30 +65,41 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion } /// - public Task> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default) + public async Task> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) { - var settings = requestSettings != null - ? ChatRequestSettings.FromRequestSettings(requestSettings) - : defaultRequestSettings; - var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory()); + var settings = executionSettings != null + ? ChatRequestSettings.FromRequestSettings(executionSettings) + : defaultRequestSettings; + var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory()); var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken); - return Task.FromResult>(new List { new LLamaSharpChatResult(outputTransform.TransformAsync(result)) }.AsReadOnly()); + var output = outputTransform.TransformAsync(result); + + var sb = new StringBuilder(); + await foreach (var token in output) + { + sb.Append(token); + } + + return new List { new(AuthorRole.Assistant, sb.ToString()) }.AsReadOnly(); } /// -#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously. - public async IAsyncEnumerable GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) -#pragma warning restore CS1998 + public async IAsyncEnumerable GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - var settings = requestSettings != null - ? ChatRequestSettings.FromRequestSettings(requestSettings) - : defaultRequestSettings; - var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory()); - // This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable. + var settings = executionSettings != null + ? ChatRequestSettings.FromRequestSettings(executionSettings) + : defaultRequestSettings; + var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory()); + var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken); - yield return new LLamaSharpChatResult(outputTransform.TransformAsync(result)); + var output = outputTransform.TransformAsync(result); + + await foreach (var token in output) + { + yield return new StreamingChatMessageContent(AuthorRole.Assistant, token); + } } } diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs deleted file mode 100644 index 1069feda..00000000 --- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs +++ /dev/null @@ -1,14 +0,0 @@ -using Microsoft.SemanticKernel.AI.ChatCompletion; - -namespace LLamaSharp.SemanticKernel.ChatCompletion; - -/// -/// LLamaSharp Chat Message -/// -public class LLamaSharpChatMessage : ChatMessage -{ - /// - public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content) - { - } -} diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs deleted file mode 100644 index 07c3ac17..00000000 --- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs +++ /dev/null @@ -1,44 +0,0 @@ -using Microsoft.SemanticKernel.AI.ChatCompletion; -using Microsoft.SemanticKernel.Orchestration; -using System.Runtime.CompilerServices; -using System.Text; - -namespace LLamaSharp.SemanticKernel.ChatCompletion; - -internal sealed class LLamaSharpChatResult : IChatResult, IChatStreamingResult -{ - private readonly ModelResult _modelResult; - private readonly IAsyncEnumerable _stream; - - /// - /// - /// - /// - public LLamaSharpChatResult(IAsyncEnumerable stream) - { - _stream = stream; - this._modelResult = new ModelResult(stream); - } - - public ModelResult ModelResult => this._modelResult; - - /// - public async Task GetChatMessageAsync(CancellationToken cancellationToken = default) - { - var sb = new StringBuilder(); - await foreach (var token in _stream) - { - sb.Append(token); - } - return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false); - } - - /// - public async IAsyncEnumerable GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) - { - await foreach (var token in _stream) - { - yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token); - } - } -} diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj index f787ac50..501ca9d2 100644 --- a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj +++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj @@ -30,10 +30,11 @@ AnyCPU;x64;Arm64 LLamaSharp.semantic-kernel Debug;Release;GPU + SKEXP0001,SKEXP0052 - + diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs index 059a9ff3..e7a6151b 100644 --- a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs +++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs @@ -1,12 +1,15 @@ using LLama.Abstractions; using LLamaSharp.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.AI; -using Microsoft.SemanticKernel.AI.TextCompletion; +using Microsoft.SemanticKernel.AI.TextGeneration; +using Microsoft.SemanticKernel.Services; using System.Runtime.CompilerServices; +using System.Text; namespace LLamaSharp.SemanticKernel.TextCompletion; -public sealed class LLamaSharpTextCompletion : ITextCompletion +public sealed class LLamaSharpTextCompletion : ITextGenerationService { public ILLamaExecutor executor; @@ -14,24 +17,34 @@ public sealed class LLamaSharpTextCompletion : ITextCompletion public IReadOnlyDictionary Attributes => this._attributes; + IReadOnlyDictionary IAIService.Attributes => throw new NotImplementedException(); + public LLamaSharpTextCompletion(ILLamaExecutor executor) { this.executor = executor; } - public async Task> GetCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default) + /// + public async Task> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) { - var settings = ChatRequestSettings.FromRequestSettings(requestSettings); - var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken); - return await Task.FromResult(new List { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false); + var settings = ChatRequestSettings.FromRequestSettings(executionSettings); + var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken); + var sb = new StringBuilder(); + await foreach (var token in result) + { + sb.Append(token); + } + return new List { new(sb.ToString()) }; } -#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously. - public async IAsyncEnumerable GetStreamingCompletionsAsync(string text, AIRequestSettings? requestSettings,[EnumeratorCancellation] CancellationToken cancellationToken = default) -#pragma warning restore CS1998 + /// + public async IAsyncEnumerable GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - var settings = ChatRequestSettings.FromRequestSettings(requestSettings); - var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken); - yield return new LLamaTextResult(result); + var settings = ChatRequestSettings.FromRequestSettings(executionSettings); + var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken); + await foreach (var token in result) + { + yield return new StreamingTextContent(token); + } } } diff --git a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs deleted file mode 100644 index b66013ba..00000000 --- a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs +++ /dev/null @@ -1,37 +0,0 @@ -using Microsoft.SemanticKernel.AI.TextCompletion; -using Microsoft.SemanticKernel.Orchestration; -using System.Runtime.CompilerServices; -using System.Text; - -namespace LLamaSharp.SemanticKernel.TextCompletion; - -internal sealed class LLamaTextResult : ITextResult, ITextStreamingResult -{ - private readonly IAsyncEnumerable _text; - - public LLamaTextResult(IAsyncEnumerable text) - { - _text = text; - ModelResult = new(text); - } - - public ModelResult ModelResult { get; } - - public async Task GetCompletionAsync(CancellationToken cancellationToken = default) - { - var sb = new StringBuilder(); - await foreach (var token in _text) - { - sb.Append(token); - } - return await Task.FromResult(sb.ToString()).ConfigureAwait(false); - } - - public async IAsyncEnumerable GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) - { - await foreach (string word in _text) - { - yield return word; - } - } -} diff --git a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs index 155c5406..83c97f02 100644 --- a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs +++ b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs @@ -1,4 +1,5 @@ using LLama; +using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.AI.Embeddings; namespace LLamaSharp.SemanticKernel.TextEmbedding; @@ -7,9 +8,9 @@ public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration { private LLamaEmbedder _embedder; - private readonly Dictionary _attributes = new(); + private readonly Dictionary _attributes = new(); - public IReadOnlyDictionary Attributes => this._attributes; + public IReadOnlyDictionary Attributes => this._attributes; public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder) { @@ -17,7 +18,7 @@ public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration } /// - public async Task>> GenerateEmbeddingsAsync(IList data, CancellationToken cancellationToken = default) + public async Task>> GenerateEmbeddingsAsync(IList data, Kernel? kernel = null, CancellationToken cancellationToken = default) { var embeddings = data.Select(text => new ReadOnlyMemory(_embedder.GetEmbeddings(text))).ToList(); return await Task.FromResult(embeddings); diff --git a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs index 99881b57..f552114d 100644 --- a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs +++ b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs @@ -75,7 +75,7 @@ namespace LLama.Unittest.SemanticKernel public void ChatRequestSettings_FromAIRequestSettings() { // Arrange - var originalRequestSettings = new AIRequestSettings() + var originalRequestSettings = new PromptExecutionSettings() { ServiceId = "test", }; @@ -92,7 +92,7 @@ namespace LLama.Unittest.SemanticKernel public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInSnakeCase() { // Arrange - var originalRequestSettings = new AIRequestSettings() + var originalRequestSettings = new PromptExecutionSettings() { ServiceId = "test", ExtensionData = new Dictionary @@ -131,7 +131,7 @@ namespace LLama.Unittest.SemanticKernel public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInPascalCase() { // Arrange - var originalRequestSettings = new AIRequestSettings() + var originalRequestSettings = new PromptExecutionSettings() { ServiceId = "test", ExtensionData = new Dictionary diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index ee23cd39..208dac1e 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -117,5 +117,13 @@ namespace LLama { _ctx.Dispose(); } + + /// + /// Tokenize a string. + /// + public int[] Tokenize(string text, bool addBos = true, bool special = false) + { + return _ctx.Tokenize(text, addBos, special); + } } } From 213b4be723e2f1f7baf824764d9dc869f2c3c7fe Mon Sep 17 00:00:00 2001 From: xbotter Date: Thu, 14 Dec 2023 09:47:32 +0800 Subject: [PATCH 2/4] bump sk-1.0.0-rc4 --- LLama.Examples/Examples/SemanticKernelChat.cs | 9 +++++---- LLama.Examples/Examples/SemanticKernelPrompt.cs | 8 ++++---- LLama.Examples/LLama.Examples.csproj | 2 +- .../ChatCompletion/ChatRequestSettings.cs | 2 +- .../ChatCompletion/ChatRequestSettingsConverter.cs | 9 ++++----- .../ChatCompletion/LLamaSharpChatCompletion.cs | 9 +++------ LLama.SemanticKernel/ExtensionMethods.cs | 3 +-- LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj | 2 +- .../TextCompletion/LLamaSharpTextCompletion.cs | 9 +++------ .../TextEmbedding/LLamaSharpEmbeddingGeneration.cs | 4 ++-- .../SemanticKernel/ChatRequestSettingsTests.cs | 10 +++++----- 11 files changed, 30 insertions(+), 37 deletions(-) diff --git a/LLama.Examples/Examples/SemanticKernelChat.cs b/LLama.Examples/Examples/SemanticKernelChat.cs index a9d5be54..86d7a1d5 100644 --- a/LLama.Examples/Examples/SemanticKernelChat.cs +++ b/LLama.Examples/Examples/SemanticKernelChat.cs @@ -2,6 +2,7 @@ using LLama.Common; using Microsoft.SemanticKernel.AI.ChatCompletion; using LLamaSharp.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.ChatCompletion; namespace LLama.Examples.Examples { @@ -29,8 +30,8 @@ namespace LLama.Examples.Examples await MessageOutputAsync(chatHistory); // First bot assistant message - string reply = await chatGPT.GetChatMessageContentAsync(chatHistory); - chatHistory.AddAssistantMessage(reply); + var reply = await chatGPT.GetChatMessageContentAsync(chatHistory); + chatHistory.AddAssistantMessage(reply.Content); await MessageOutputAsync(chatHistory); // Second user message @@ -39,14 +40,14 @@ namespace LLama.Examples.Examples // Second bot assistant message reply = await chatGPT.GetChatMessageContentAsync(chatHistory); - chatHistory.AddAssistantMessage(reply); + chatHistory.AddAssistantMessage(reply.Content); await MessageOutputAsync(chatHistory); } /// /// Outputs the last message of the chat history /// - private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory) + private static Task MessageOutputAsync(Microsoft.SemanticKernel.ChatCompletion.ChatHistory chatHistory) { var message = chatHistory.Last(); diff --git a/LLama.Examples/Examples/SemanticKernelPrompt.cs b/LLama.Examples/Examples/SemanticKernelPrompt.cs index 21cb55de..4c4157a3 100644 --- a/LLama.Examples/Examples/SemanticKernelPrompt.cs +++ b/LLama.Examples/Examples/SemanticKernelPrompt.cs @@ -3,7 +3,7 @@ using LLama.Common; using LLamaSharp.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel; using LLamaSharp.SemanticKernel.TextCompletion; -using Microsoft.SemanticKernel.AI.TextGeneration; +using Microsoft.SemanticKernel.TextGeneration; using Microsoft.Extensions.DependencyInjection; namespace LLama.Examples.Examples @@ -21,7 +21,7 @@ namespace LLama.Examples.Examples using var model = LLamaWeights.LoadFromFile(parameters); var ex = new StatelessExecutor(model, parameters); - var builder = new KernelBuilder(); + var builder = Kernel.CreateBuilder(); builder.Services.AddKeyedSingleton("local-llama", new LLamaSharpTextCompletion(ex)); var kernel = builder.Build(); @@ -43,9 +43,9 @@ One line TLDR with the fewest words."; 2. The acceleration of an object depends on the mass of the object and the amount of force applied. 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; - Console.WriteLine((await kernel.InvokeAsync(summarize,new KernelArguments(text1))).GetValue()); + Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue()); - Console.WriteLine((await kernel.InvokeAsync(summarize, new KernelArguments(text2))).GetValue()); + Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue()); } } } diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj index 9e4f17ab..2266bdcf 100644 --- a/LLama.Examples/LLama.Examples.csproj +++ b/LLama.Examples/LLama.Examples.csproj @@ -16,7 +16,7 @@ - + diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs index aab3240f..ac22e1fc 100644 --- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs +++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs @@ -1,4 +1,4 @@ -using Microsoft.SemanticKernel.AI; +using Microsoft.SemanticKernel; using System.Text.Json; using System.Text.Json.Serialization; diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs index f0d3a430..e320ea3f 100644 --- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs +++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs @@ -31,6 +31,10 @@ public class ChatRequestSettingsConverter : JsonConverter switch (propertyName) { + case "MODELID": + case "MODEL_ID": + requestSettings.ModelId = reader.GetString(); + break; case "TEMPERATURE": requestSettings.Temperature = reader.GetDouble(); break; @@ -62,10 +66,6 @@ public class ChatRequestSettingsConverter : JsonConverter case "TOKEN_SELECTION_BIASES": requestSettings.TokenSelectionBiases = JsonSerializer.Deserialize>(ref reader, options) ?? new Dictionary(); break; - case "SERVICEID": - case "SERVICE_ID": - requestSettings.ServiceId = reader.GetString(); - break; default: reader.Skip(); break; @@ -98,7 +98,6 @@ public class ChatRequestSettingsConverter : JsonConverter writer.WriteNumber("results_per_prompt", value.ResultsPerPrompt); writer.WritePropertyName("token_selection_biases"); JsonSerializer.Serialize(writer, value.TokenSelectionBiases, options); - writer.WriteString("service_id", value.ServiceId); writer.WriteEndObject(); } diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs index 9611a0cf..b1c0d347 100644 --- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs @@ -1,8 +1,7 @@ using LLama; using LLama.Abstractions; using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.AI; -using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Services; using System; using System.IO; @@ -22,11 +21,9 @@ public sealed class LLamaSharpChatCompletion : IChatCompletionService private readonly IHistoryTransform historyTransform; private readonly ITextStreamTransform outputTransform; - private readonly Dictionary _attributes = new(); + private readonly Dictionary _attributes = new(); - public IReadOnlyDictionary Attributes => this._attributes; - - IReadOnlyDictionary IAIService.Attributes => throw new NotImplementedException(); + public IReadOnlyDictionary Attributes => this._attributes; static ChatRequestSettings GetDefaultSettings() { diff --git a/LLama.SemanticKernel/ExtensionMethods.cs b/LLama.SemanticKernel/ExtensionMethods.cs index 6f39e373..85f9064c 100644 --- a/LLama.SemanticKernel/ExtensionMethods.cs +++ b/LLama.SemanticKernel/ExtensionMethods.cs @@ -1,6 +1,5 @@ using LLamaSharp.SemanticKernel.ChatCompletion; -using Microsoft.SemanticKernel.AI.ChatCompletion; - +using Microsoft.SemanticKernel.ChatCompletion; namespace LLamaSharp.SemanticKernel; public static class ExtensionMethods diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj index 501ca9d2..8a39de53 100644 --- a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj +++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj @@ -34,7 +34,7 @@ - + diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs index e7a6151b..08ec33e1 100644 --- a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs +++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs @@ -1,9 +1,8 @@ using LLama.Abstractions; using LLamaSharp.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.AI; -using Microsoft.SemanticKernel.AI.TextGeneration; using Microsoft.SemanticKernel.Services; +using Microsoft.SemanticKernel.TextGeneration; using System.Runtime.CompilerServices; using System.Text; @@ -13,11 +12,9 @@ public sealed class LLamaSharpTextCompletion : ITextGenerationService { public ILLamaExecutor executor; - private readonly Dictionary _attributes = new(); + private readonly Dictionary _attributes = new(); - public IReadOnlyDictionary Attributes => this._attributes; - - IReadOnlyDictionary IAIService.Attributes => throw new NotImplementedException(); + public IReadOnlyDictionary Attributes => this._attributes; public LLamaSharpTextCompletion(ILLamaExecutor executor) { diff --git a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs index 83c97f02..73ceb0f2 100644 --- a/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs +++ b/LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs @@ -1,10 +1,10 @@ using LLama; using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.AI.Embeddings; +using Microsoft.SemanticKernel.Embeddings; namespace LLamaSharp.SemanticKernel.TextEmbedding; -public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration +public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGenerationService { private LLamaEmbedder _embedder; diff --git a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs index f552114d..ef5d9670 100644 --- a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs +++ b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs @@ -1,5 +1,5 @@ using LLamaSharp.SemanticKernel.ChatCompletion; -using Microsoft.SemanticKernel.AI; +using Microsoft.SemanticKernel; namespace LLama.Unittest.SemanticKernel { @@ -77,7 +77,7 @@ namespace LLama.Unittest.SemanticKernel // Arrange var originalRequestSettings = new PromptExecutionSettings() { - ServiceId = "test", + ModelId = "test", }; // Act @@ -85,7 +85,7 @@ namespace LLama.Unittest.SemanticKernel // Assert Assert.NotNull(requestSettings); - Assert.Equal(originalRequestSettings.ServiceId, requestSettings.ServiceId); + Assert.Equal(originalRequestSettings.ModelId, requestSettings.ModelId); } [Fact] @@ -94,7 +94,7 @@ namespace LLama.Unittest.SemanticKernel // Arrange var originalRequestSettings = new PromptExecutionSettings() { - ServiceId = "test", + ModelId = "test", ExtensionData = new Dictionary { { "frequency_penalty", 0.5 }, @@ -133,7 +133,7 @@ namespace LLama.Unittest.SemanticKernel // Arrange var originalRequestSettings = new PromptExecutionSettings() { - ServiceId = "test", + ModelId = "test", ExtensionData = new Dictionary { { "FrequencyPenalty", 0.5 }, From 40ac944fb5b9abc3a3f59ea30d8c724ace8270e3 Mon Sep 17 00:00:00 2001 From: xbotter Date: Tue, 19 Dec 2023 08:42:01 +0800 Subject: [PATCH 3/4] Bump sk to 1.0.1 --- LLama.Examples/Examples/SemanticKernelChat.cs | 2 +- LLama.Examples/Examples/SemanticKernelPrompt.cs | 2 +- LLama.Examples/LLama.Examples.csproj | 3 ++- LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/LLama.Examples/Examples/SemanticKernelChat.cs b/LLama.Examples/Examples/SemanticKernelChat.cs index 86d7a1d5..52324eed 100644 --- a/LLama.Examples/Examples/SemanticKernelChat.cs +++ b/LLama.Examples/Examples/SemanticKernelChat.cs @@ -10,7 +10,7 @@ namespace LLama.Examples.Examples { public static async Task Run() { - Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs"); Console.Write("Please input your model path: "); var modelPath = Console.ReadLine(); diff --git a/LLama.Examples/Examples/SemanticKernelPrompt.cs b/LLama.Examples/Examples/SemanticKernelPrompt.cs index 4c4157a3..40838e8b 100644 --- a/LLama.Examples/Examples/SemanticKernelPrompt.cs +++ b/LLama.Examples/Examples/SemanticKernelPrompt.cs @@ -12,7 +12,7 @@ namespace LLama.Examples.Examples { public static async Task Run() { - Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs"); + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); Console.Write("Please input your model path: "); var modelPath = Console.ReadLine(); diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj index 94704e01..b0d7740e 100644 --- a/LLama.Examples/LLama.Examples.csproj +++ b/LLama.Examples/LLama.Examples.csproj @@ -16,7 +16,8 @@ - + + diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj index 8a39de53..2f365924 100644 --- a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj +++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj @@ -34,7 +34,7 @@ - + From 211ce12bf57e315b6e8987e3736c6b0e722a15b5 Mon Sep 17 00:00:00 2001 From: xbotter Date: Thu, 21 Dec 2023 10:28:37 +0800 Subject: [PATCH 4/4] LLamaEmbedder exposes the Context --- .../LLamaSharpTextEmbeddingGenerator.cs | 2 +- LLama/LLamaEmbedder.cs | 32 ++----------------- 2 files changed, 3 insertions(+), 31 deletions(-) diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs index a00c5352..8148adc8 100644 --- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs +++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs @@ -101,6 +101,6 @@ namespace LLamaSharp.KernelMemory } /// - public int CountTokens(string text) => _embedder.Tokenize(text).Length; + public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length; } } diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index 208dac1e..ab56280c 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -20,30 +20,9 @@ namespace LLama public int EmbeddingSize => _ctx.EmbeddingSize; /// - /// Create a new embedder (loading temporary weights) + /// LLama Context /// - /// - /// - [Obsolete("Preload LLamaWeights and use the constructor which accepts them")] - public LLamaEmbedder(ILLamaParams allParams, ILogger? logger = null) - : this(allParams, allParams, logger) - { - } - - /// - /// Create a new embedder (loading temporary weights) - /// - /// - /// - /// - [Obsolete("Preload LLamaWeights and use the constructor which accepts them")] - public LLamaEmbedder(IModelParams modelParams, IContextParams contextParams, ILogger? logger = null) - { - using var weights = LLamaWeights.LoadFromFile(modelParams); - - contextParams.EmbeddingMode = true; - _ctx = weights.CreateContext(contextParams, logger); - } + public LLamaContext Context => this._ctx; /// /// Create a new embedder, using the given LLamaWeights @@ -118,12 +97,5 @@ namespace LLama _ctx.Dispose(); } - /// - /// Tokenize a string. - /// - public int[] Tokenize(string text, bool addBos = true, bool special = false) - { - return _ctx.Tokenize(text, addBos, special); - } } }