bump sk to 1.0.1 & km to 0.18tags/0.9.1
| @@ -16,6 +16,11 @@ namespace LLama.Examples.Examples | |||
| Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs"); | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| var searchClientConfig = new SearchClientConfig | |||
| { | |||
| MaxMatchesCount = 1, | |||
| AnswerTokens = 100, | |||
| }; | |||
| var memory = new KernelMemoryBuilder() | |||
| .WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath) | |||
| { | |||
| @@ -24,6 +29,7 @@ namespace LLama.Examples.Examples | |||
| AntiPrompts = new List<string> { "\n\n" } | |||
| } | |||
| }) | |||
| .WithSearchClientConfig(searchClientConfig) | |||
| .With(new TextPartitioningOptions | |||
| { | |||
| MaxTokensPerParagraph = 300, | |||
| @@ -2,6 +2,7 @@ | |||
| using LLama.Common; | |||
| using Microsoft.SemanticKernel.AI.ChatCompletion; | |||
| using LLamaSharp.SemanticKernel.ChatCompletion; | |||
| using Microsoft.SemanticKernel.ChatCompletion; | |||
| namespace LLama.Examples.Examples | |||
| { | |||
| @@ -9,7 +10,7 @@ namespace LLama.Examples.Examples | |||
| { | |||
| public static async Task Run() | |||
| { | |||
| Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); | |||
| Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs"); | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| @@ -29,8 +30,8 @@ namespace LLama.Examples.Examples | |||
| await MessageOutputAsync(chatHistory); | |||
| // First bot assistant message | |||
| string reply = await chatGPT.GenerateMessageAsync(chatHistory); | |||
| chatHistory.AddAssistantMessage(reply); | |||
| var reply = await chatGPT.GetChatMessageContentAsync(chatHistory); | |||
| chatHistory.AddAssistantMessage(reply.Content); | |||
| await MessageOutputAsync(chatHistory); | |||
| // Second user message | |||
| @@ -38,15 +39,15 @@ namespace LLama.Examples.Examples | |||
| await MessageOutputAsync(chatHistory); | |||
| // Second bot assistant message | |||
| reply = await chatGPT.GenerateMessageAsync(chatHistory); | |||
| chatHistory.AddAssistantMessage(reply); | |||
| reply = await chatGPT.GetChatMessageContentAsync(chatHistory); | |||
| chatHistory.AddAssistantMessage(reply.Content); | |||
| await MessageOutputAsync(chatHistory); | |||
| } | |||
| /// <summary> | |||
| /// Outputs the last message of the chat history | |||
| /// </summary> | |||
| private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory) | |||
| private static Task MessageOutputAsync(Microsoft.SemanticKernel.ChatCompletion.ChatHistory chatHistory) | |||
| { | |||
| var message = chatHistory.Last(); | |||
| @@ -2,8 +2,9 @@ | |||
| using LLama.Common; | |||
| using LLamaSharp.SemanticKernel.ChatCompletion; | |||
| using Microsoft.SemanticKernel; | |||
| using Microsoft.SemanticKernel.AI.TextCompletion; | |||
| using LLamaSharp.SemanticKernel.TextCompletion; | |||
| using Microsoft.SemanticKernel.TextGeneration; | |||
| using Microsoft.Extensions.DependencyInjection; | |||
| namespace LLama.Examples.Examples | |||
| { | |||
| @@ -11,7 +12,7 @@ namespace LLama.Examples.Examples | |||
| { | |||
| public static async Task Run() | |||
| { | |||
| Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs"); | |||
| Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| @@ -20,8 +21,8 @@ namespace LLama.Examples.Examples | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| var ex = new StatelessExecutor(model, parameters); | |||
| var builder = new KernelBuilder(); | |||
| builder.WithAIService<ITextCompletion>("local-llama", new LLamaSharpTextCompletion(ex), true); | |||
| var builder = Kernel.CreateBuilder(); | |||
| builder.Services.AddKeyedSingleton<ITextGenerationService>("local-llama", new LLamaSharpTextCompletion(ex)); | |||
| var kernel = builder.Build(); | |||
| @@ -29,8 +30,8 @@ namespace LLama.Examples.Examples | |||
| One line TLDR with the fewest words."; | |||
| ChatRequestSettings settings = new() {MaxTokens = 100}; | |||
| var summarize = kernel.CreateSemanticFunction(prompt, requestSettings: settings); | |||
| ChatRequestSettings settings = new() { MaxTokens = 100 }; | |||
| var summarize = kernel.CreateFunctionFromPrompt(prompt, settings); | |||
| string text1 = @" | |||
| 1st Law of Thermodynamics - Energy cannot be created or destroyed. | |||
| @@ -42,10 +43,9 @@ One line TLDR with the fewest words."; | |||
| 2. The acceleration of an object depends on the mass of the object and the amount of force applied. | |||
| 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; | |||
| Console.WriteLine((await kernel.RunAsync(text1, summarize)).GetValue<string>()); | |||
| Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue<string>()); | |||
| Console.WriteLine((await kernel.RunAsync(text2, summarize)).GetValue<string>()); | |||
| Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue<string>()); | |||
| } | |||
| } | |||
| } | |||
| @@ -1,4 +1,4 @@ | |||
| <Project Sdk="Microsoft.NET.Sdk"> | |||
| <Project Sdk="Microsoft.NET.Sdk"> | |||
| <Import Project="..\LLama\LLamaSharp.Runtime.targets" /> | |||
| <PropertyGroup> | |||
| <OutputType>Exe</OutputType> | |||
| @@ -9,28 +9,15 @@ | |||
| <!-- Set IncludeBuiltInRuntimes to false to include your own runtime libraries and not link the defaults --> | |||
| <IncludeBuiltInRuntimes>true</IncludeBuiltInRuntimes> | |||
| <AllowUnsafeBlocks>true</AllowUnsafeBlocks> | |||
| </PropertyGroup> | |||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'"> | |||
| <NoWarn>1701;1702;8604</NoWarn> | |||
| </PropertyGroup> | |||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'"> | |||
| <NoWarn>1701;1702;8604</NoWarn> | |||
| </PropertyGroup> | |||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | |||
| <NoWarn>1701;1702;8604</NoWarn> | |||
| </PropertyGroup> | |||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> | |||
| <NoWarn>1701;1702;8604</NoWarn> | |||
| <LangVersion>12</LangVersion> | |||
| <NoWarn>1701;1702;8604;SKEXP0001;SKEXP0052;SKEXP0003</NoWarn> | |||
| </PropertyGroup> | |||
| <ItemGroup> | |||
| <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" /> | |||
| <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.12.231123.1-preview" /> | |||
| <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta8" /> | |||
| <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.18.231209.1-preview" /> | |||
| <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.1" /> | |||
| <PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.0.1-alpha" /> | |||
| <PackageReference Include="Spectre.Console" Version="0.48.0" /> | |||
| </ItemGroup> | |||
| @@ -17,19 +17,6 @@ namespace LLamaSharp.KernelMemory | |||
| public static class BuilderExtensions | |||
| { | |||
| private static IKernelMemoryBuilder WithCustomEmbeddingGeneration(this IKernelMemoryBuilder builder, ITextEmbeddingGeneration embeddingGeneration) | |||
| { | |||
| builder.AddSingleton<ITextEmbeddingGeneration>(embeddingGeneration); | |||
| builder.AddIngestionEmbeddingGenerator(embeddingGeneration); | |||
| return builder; | |||
| } | |||
| private static IKernelMemoryBuilder WithCustomTextGeneration(this IKernelMemoryBuilder builder, ITextGeneration textGeneration) | |||
| { | |||
| builder.AddSingleton<ITextGeneration>(textGeneration); | |||
| return builder; | |||
| } | |||
| /// <summary> | |||
| /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder. | |||
| /// </summary> | |||
| @@ -38,7 +25,9 @@ namespace LLamaSharp.KernelMemory | |||
| /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added.</returns> | |||
| public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config) | |||
| { | |||
| builder.WithCustomEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(config)); | |||
| var generator = new LLamaSharpTextEmbeddingGenerator(config); | |||
| builder.AddSingleton<ITextEmbeddingGenerator>(generator); | |||
| builder.AddIngestionEmbeddingGenerator(generator); | |||
| return builder; | |||
| } | |||
| @@ -46,11 +35,12 @@ namespace LLamaSharp.KernelMemory | |||
| /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder. | |||
| /// </summary> | |||
| /// <param name="builder">The KernelMemoryBuilder instance.</param> | |||
| /// <param name="textEmbeddingGeneration">The LLamaSharpTextEmbeddingGeneration instance.</param> | |||
| /// <param name="textEmbeddingGenerator">The LLamaSharpTextEmbeddingGeneration instance.</param> | |||
| /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added.</returns> | |||
| public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGeneration textEmbeddingGeneration) | |||
| public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGenerator textEmbeddingGenerator) | |||
| { | |||
| builder.WithCustomEmbeddingGeneration(textEmbeddingGeneration); | |||
| builder.AddSingleton<ITextEmbeddingGenerator>(textEmbeddingGenerator); | |||
| builder.AddIngestionEmbeddingGenerator(textEmbeddingGenerator); | |||
| return builder; | |||
| } | |||
| @@ -62,7 +52,7 @@ namespace LLamaSharp.KernelMemory | |||
| /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextGeneration added.</returns> | |||
| public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config) | |||
| { | |||
| builder.WithCustomTextGeneration(new LlamaSharpTextGeneration(config)); | |||
| builder.AddSingleton<ITextGenerator>(new LlamaSharpTextGenerator(config)); | |||
| return builder; | |||
| } | |||
| @@ -70,11 +60,11 @@ namespace LLamaSharp.KernelMemory | |||
| /// Adds LLamaSharpTextGeneration to the KernelMemoryBuilder. | |||
| /// </summary> | |||
| /// <param name="builder">The KernelMemoryBuilder instance.</param> | |||
| /// <param name="textGeneration">The LlamaSharpTextGeneration instance.</param> | |||
| /// <param name="textGenerator">The LlamaSharpTextGeneration instance.</param> | |||
| /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextGeneration added.</returns> | |||
| public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGeneration textGeneration) | |||
| public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGenerator textGenerator) | |||
| { | |||
| builder.WithCustomTextGeneration(textGeneration); | |||
| builder.AddSingleton<ITextGenerator>(textGenerator); | |||
| return builder; | |||
| } | |||
| @@ -96,8 +86,8 @@ namespace LLamaSharp.KernelMemory | |||
| var context = weights.CreateContext(parameters); | |||
| var executor = new StatelessExecutor(weights, parameters); | |||
| var embedder = new LLamaEmbedder(weights, parameters); | |||
| builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(embedder)); | |||
| builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGeneration(weights, context, executor, config?.DefaultInferenceParams)); | |||
| builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGenerator(embedder)); | |||
| builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGenerator(weights, context, executor, config?.DefaultInferenceParams)); | |||
| return builder; | |||
| } | |||
| } | |||
| @@ -4,8 +4,6 @@ | |||
| <TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks> | |||
| <ImplicitUsings>enable</ImplicitUsings> | |||
| <Nullable>enable</Nullable> | |||
| <Version>0.7.1</Version> | |||
| <Version>0.8.0</Version> | |||
| <Authors>Xbotter</Authors> | |||
| <Company>SciSharp STACK</Company> | |||
| @@ -29,7 +27,7 @@ | |||
| </PropertyGroup> | |||
| <ItemGroup> | |||
| <PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.12.231123.1-preview" /> | |||
| <PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.18.231209.1-preview" /> | |||
| </ItemGroup> | |||
| <ItemGroup> | |||
| @@ -1,6 +1,8 @@ | |||
| using LLama; | |||
| using LLama.Abstractions; | |||
| using LLama.Common; | |||
| using Microsoft.KernelMemory; | |||
| using Microsoft.KernelMemory.AI; | |||
| using Microsoft.SemanticKernel.AI.Embeddings; | |||
| using System; | |||
| using System.Collections.Generic; | |||
| @@ -13,22 +15,23 @@ namespace LLamaSharp.KernelMemory | |||
| /// <summary> | |||
| /// Provides text embedding generation for LLamaSharp. | |||
| /// </summary> | |||
| public class LLamaSharpTextEmbeddingGeneration : ITextEmbeddingGeneration, IDisposable | |||
| public class LLamaSharpTextEmbeddingGenerator | |||
| : ITextEmbeddingGenerator, IDisposable | |||
| { | |||
| private readonly LLamaSharpConfig? _config; | |||
| private readonly LLamaWeights? _weights; | |||
| private readonly LLamaEmbedder _embedder; | |||
| private bool _ownsEmbedder = false; | |||
| private bool _ownsWeights = false; | |||
| private readonly Dictionary<string, string> _attributes = new(); | |||
| public IReadOnlyDictionary<string, string> Attributes => this._attributes; | |||
| /// <inheritdoc/> | |||
| public int MaxTokens => (int?)_config?.ContextSize ?? 2048; | |||
| /// <summary> | |||
| /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class. | |||
| /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class. | |||
| /// </summary> | |||
| /// <param name="config">The configuration for LLamaSharp.</param> | |||
| public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config) | |||
| public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config) | |||
| { | |||
| this._config = config; | |||
| var @params = new ModelParams(_config.ModelPath); | |||
| @@ -39,11 +42,11 @@ namespace LLamaSharp.KernelMemory | |||
| } | |||
| /// <summary> | |||
| /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class from reused weights. | |||
| /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class from reused weights. | |||
| /// </summary> | |||
| /// <param name="config">The configuration for LLamaSharp.</param> | |||
| /// <param name="weights">A LLamaWeights object.</param> | |||
| public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config, LLamaWeights weights) | |||
| public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights weights) | |||
| { | |||
| this._config = config; | |||
| var @params = new ModelParams(_config.ModelPath); | |||
| @@ -53,10 +56,10 @@ namespace LLamaSharp.KernelMemory | |||
| } | |||
| /// <summary> | |||
| /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class from reused embedder. | |||
| /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class from reused embedder. | |||
| /// </summary> | |||
| /// <param name="embedder">A LLamaEmbedder object.</param> | |||
| public LLamaSharpTextEmbeddingGeneration(LLamaEmbedder embedder) | |||
| public LLamaSharpTextEmbeddingGenerator(LLamaEmbedder embedder) | |||
| { | |||
| this._config = null; | |||
| this._weights = null; | |||
| @@ -89,5 +92,15 @@ namespace LLamaSharp.KernelMemory | |||
| return Task.FromResult(results); | |||
| } | |||
| /// <inheritdoc/> | |||
| public Task<Embedding> GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default) | |||
| { | |||
| var embeddings = _embedder.GetEmbeddings(text); | |||
| return Task.FromResult(new Embedding(embeddings)); | |||
| } | |||
| /// <inheritdoc/> | |||
| public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length; | |||
| } | |||
| } | |||
| @@ -13,7 +13,7 @@ namespace LLamaSharp.KernelMemory | |||
| /// <summary> | |||
| /// Provides text generation for LLamaSharp. | |||
| /// </summary> | |||
| public class LlamaSharpTextGeneration : ITextGeneration, IDisposable | |||
| public class LlamaSharpTextGenerator : ITextGenerator, IDisposable | |||
| { | |||
| private readonly LLamaWeights _weights; | |||
| private readonly StatelessExecutor _executor; | |||
| @@ -22,11 +22,13 @@ namespace LLamaSharp.KernelMemory | |||
| private bool _ownsContext = false; | |||
| private bool _ownsWeights = false; | |||
| public int MaxTokenTotal { get; } | |||
| /// <summary> | |||
| /// Initializes a new instance of the <see cref="LlamaSharpTextGeneration"/> class. | |||
| /// Initializes a new instance of the <see cref="LlamaSharpTextGenerator"/> class. | |||
| /// </summary> | |||
| /// <param name="config">The configuration for LLamaSharp.</param> | |||
| public LlamaSharpTextGeneration(LLamaSharpConfig config) | |||
| public LlamaSharpTextGenerator(LLamaSharpConfig config) | |||
| { | |||
| var parameters = new ModelParams(config.ModelPath) | |||
| { | |||
| @@ -39,21 +41,23 @@ namespace LLamaSharp.KernelMemory | |||
| _executor = new StatelessExecutor(_weights, parameters); | |||
| _defaultInferenceParams = config?.DefaultInferenceParams; | |||
| _ownsWeights = _ownsContext = true; | |||
| MaxTokenTotal = (int)parameters.ContextSize; | |||
| } | |||
| /// <summary> | |||
| /// Initializes a new instance of the <see cref="LlamaSharpTextGeneration"/> class from reused weights, context and executor. | |||
| /// Initializes a new instance of the <see cref="LlamaSharpTextGenerator"/> class from reused weights, context and executor. | |||
| /// If executor is not specified, then a StatelessExecutor will be created with `context.Params`. So far only `StatelessExecutor` is expected. | |||
| /// </summary> | |||
| /// <param name="weights">A LLamaWeights object.</param> | |||
| /// <param name="context">A LLamaContext object.</param> | |||
| /// <param name="executor">An executor. Currently only StatelessExecutor is expected.</param> | |||
| public LlamaSharpTextGeneration(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null) | |||
| public LlamaSharpTextGenerator(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null) | |||
| { | |||
| _weights = weights; | |||
| _context = context; | |||
| _executor = executor ?? new StatelessExecutor(_weights, _context.Params); | |||
| _defaultInferenceParams = inferenceParams; | |||
| MaxTokenTotal = (int)_context.Params.ContextSize; | |||
| } | |||
| /// <inheritdoc/> | |||
| @@ -102,5 +106,8 @@ namespace LLamaSharp.KernelMemory | |||
| }; | |||
| } | |||
| } | |||
| /// <inheritdoc/> | |||
| public int CountTokens(string text) => _context.Tokenize(text).Length; | |||
| } | |||
| } | |||
| @@ -1,10 +1,10 @@ | |||
| using Microsoft.SemanticKernel.AI; | |||
| using Microsoft.SemanticKernel; | |||
| using System.Text.Json; | |||
| using System.Text.Json.Serialization; | |||
| namespace LLamaSharp.SemanticKernel.ChatCompletion; | |||
| public class ChatRequestSettings : AIRequestSettings | |||
| public class ChatRequestSettings : PromptExecutionSettings | |||
| { | |||
| /// <summary> | |||
| /// Temperature controls the randomness of the completion. | |||
| @@ -68,7 +68,7 @@ public class ChatRequestSettings : AIRequestSettings | |||
| /// <param name="requestSettings">Template configuration</param> | |||
| /// <param name="defaultMaxTokens">Default max tokens</param> | |||
| /// <returns>An instance of OpenAIRequestSettings</returns> | |||
| public static ChatRequestSettings FromRequestSettings(AIRequestSettings? requestSettings, int? defaultMaxTokens = null) | |||
| public static ChatRequestSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null) | |||
| { | |||
| if (requestSettings is null) | |||
| { | |||
| @@ -31,6 +31,10 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings> | |||
| switch (propertyName) | |||
| { | |||
| case "MODELID": | |||
| case "MODEL_ID": | |||
| requestSettings.ModelId = reader.GetString(); | |||
| break; | |||
| case "TEMPERATURE": | |||
| requestSettings.Temperature = reader.GetDouble(); | |||
| break; | |||
| @@ -62,10 +66,6 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings> | |||
| case "TOKEN_SELECTION_BIASES": | |||
| requestSettings.TokenSelectionBiases = JsonSerializer.Deserialize<IDictionary<int, int>>(ref reader, options) ?? new Dictionary<int, int>(); | |||
| break; | |||
| case "SERVICEID": | |||
| case "SERVICE_ID": | |||
| requestSettings.ServiceId = reader.GetString(); | |||
| break; | |||
| default: | |||
| reader.Skip(); | |||
| break; | |||
| @@ -98,7 +98,6 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings> | |||
| writer.WriteNumber("results_per_prompt", value.ResultsPerPrompt); | |||
| writer.WritePropertyName("token_selection_biases"); | |||
| JsonSerializer.Serialize(writer, value.TokenSelectionBiases, options); | |||
| writer.WriteString("service_id", value.ServiceId); | |||
| writer.WriteEndObject(); | |||
| } | |||
| @@ -1,8 +1,12 @@ | |||
| using LLama; | |||
| using LLama.Abstractions; | |||
| using Microsoft.SemanticKernel.AI; | |||
| using Microsoft.SemanticKernel.AI.ChatCompletion; | |||
| using Microsoft.SemanticKernel; | |||
| using Microsoft.SemanticKernel.ChatCompletion; | |||
| using Microsoft.SemanticKernel.Services; | |||
| using System; | |||
| using System.IO; | |||
| using System.Runtime.CompilerServices; | |||
| using System.Text; | |||
| using static LLama.LLamaTransforms; | |||
| namespace LLamaSharp.SemanticKernel.ChatCompletion; | |||
| @@ -10,16 +14,16 @@ namespace LLamaSharp.SemanticKernel.ChatCompletion; | |||
| /// <summary> | |||
| /// LLamaSharp ChatCompletion | |||
| /// </summary> | |||
| public sealed class LLamaSharpChatCompletion : IChatCompletion | |||
| public sealed class LLamaSharpChatCompletion : IChatCompletionService | |||
| { | |||
| private readonly StatelessExecutor _model; | |||
| private ChatRequestSettings defaultRequestSettings; | |||
| private readonly IHistoryTransform historyTransform; | |||
| private readonly ITextStreamTransform outputTransform; | |||
| private readonly Dictionary<string, string> _attributes = new(); | |||
| private readonly Dictionary<string, object?> _attributes = new(); | |||
| public IReadOnlyDictionary<string, string> Attributes => this._attributes; | |||
| public IReadOnlyDictionary<string, object?> Attributes => this._attributes; | |||
| static ChatRequestSettings GetDefaultSettings() | |||
| { | |||
| @@ -45,7 +49,6 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion | |||
| $"{LLama.Common.AuthorRole.System}:"}); | |||
| } | |||
| /// <inheritdoc/> | |||
| public ChatHistory CreateNewChat(string? instructions = "") | |||
| { | |||
| var history = new ChatHistory(); | |||
| @@ -59,30 +62,41 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion | |||
| } | |||
| /// <inheritdoc/> | |||
| public Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default) | |||
| public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) | |||
| { | |||
| var settings = requestSettings != null | |||
| ? ChatRequestSettings.FromRequestSettings(requestSettings) | |||
| : defaultRequestSettings; | |||
| var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory()); | |||
| var settings = executionSettings != null | |||
| ? ChatRequestSettings.FromRequestSettings(executionSettings) | |||
| : defaultRequestSettings; | |||
| var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory()); | |||
| var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| return Task.FromResult<IReadOnlyList<IChatResult>>(new List<IChatResult> { new LLamaSharpChatResult(outputTransform.TransformAsync(result)) }.AsReadOnly()); | |||
| var output = outputTransform.TransformAsync(result); | |||
| var sb = new StringBuilder(); | |||
| await foreach (var token in output) | |||
| { | |||
| sb.Append(token); | |||
| } | |||
| return new List<ChatMessageContent> { new(AuthorRole.Assistant, sb.ToString()) }.AsReadOnly(); | |||
| } | |||
| /// <inheritdoc/> | |||
| #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously. | |||
| public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) | |||
| #pragma warning restore CS1998 | |||
| public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) | |||
| { | |||
| var settings = requestSettings != null | |||
| ? ChatRequestSettings.FromRequestSettings(requestSettings) | |||
| : defaultRequestSettings; | |||
| var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory()); | |||
| // This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable. | |||
| var settings = executionSettings != null | |||
| ? ChatRequestSettings.FromRequestSettings(executionSettings) | |||
| : defaultRequestSettings; | |||
| var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory()); | |||
| var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| yield return new LLamaSharpChatResult(outputTransform.TransformAsync(result)); | |||
| var output = outputTransform.TransformAsync(result); | |||
| await foreach (var token in output) | |||
| { | |||
| yield return new StreamingChatMessageContent(AuthorRole.Assistant, token); | |||
| } | |||
| } | |||
| } | |||
| @@ -1,14 +0,0 @@ | |||
| using Microsoft.SemanticKernel.AI.ChatCompletion; | |||
| namespace LLamaSharp.SemanticKernel.ChatCompletion; | |||
| /// <summary> | |||
| /// LLamaSharp Chat Message | |||
| /// </summary> | |||
| public class LLamaSharpChatMessage : ChatMessage | |||
| { | |||
| /// <inheritdoc/> | |||
| public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content) | |||
| { | |||
| } | |||
| } | |||
| @@ -1,44 +0,0 @@ | |||
| using Microsoft.SemanticKernel.AI.ChatCompletion; | |||
| using Microsoft.SemanticKernel.Orchestration; | |||
| using System.Runtime.CompilerServices; | |||
| using System.Text; | |||
| namespace LLamaSharp.SemanticKernel.ChatCompletion; | |||
| internal sealed class LLamaSharpChatResult : IChatResult, IChatStreamingResult | |||
| { | |||
| private readonly ModelResult _modelResult; | |||
| private readonly IAsyncEnumerable<string> _stream; | |||
| /// <summary> | |||
| /// | |||
| /// </summary> | |||
| /// <param name="stream"></param> | |||
| public LLamaSharpChatResult(IAsyncEnumerable<string> stream) | |||
| { | |||
| _stream = stream; | |||
| this._modelResult = new ModelResult(stream); | |||
| } | |||
| public ModelResult ModelResult => this._modelResult; | |||
| /// <inheritdoc/> | |||
| public async Task<ChatMessage> GetChatMessageAsync(CancellationToken cancellationToken = default) | |||
| { | |||
| var sb = new StringBuilder(); | |||
| await foreach (var token in _stream) | |||
| { | |||
| sb.Append(token); | |||
| } | |||
| return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false); | |||
| } | |||
| /// <inheritdoc/> | |||
| public async IAsyncEnumerable<ChatMessage> GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) | |||
| { | |||
| await foreach (var token in _stream) | |||
| { | |||
| yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token); | |||
| } | |||
| } | |||
| } | |||
| @@ -1,6 +1,5 @@ | |||
| using LLamaSharp.SemanticKernel.ChatCompletion; | |||
| using Microsoft.SemanticKernel.AI.ChatCompletion; | |||
| using Microsoft.SemanticKernel.ChatCompletion; | |||
| namespace LLamaSharp.SemanticKernel; | |||
| public static class ExtensionMethods | |||
| @@ -30,10 +30,11 @@ | |||
| <Platforms>AnyCPU;x64;Arm64</Platforms> | |||
| <PackageId>LLamaSharp.semantic-kernel</PackageId> | |||
| <Configurations>Debug;Release;GPU</Configurations> | |||
| <NoWarn>SKEXP0001,SKEXP0052</NoWarn> | |||
| </PropertyGroup> | |||
| <ItemGroup> | |||
| <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-beta8" /> | |||
| <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.1" /> | |||
| </ItemGroup> | |||
| <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'"> | |||
| @@ -1,37 +1,47 @@ | |||
| using LLama.Abstractions; | |||
| using LLamaSharp.SemanticKernel.ChatCompletion; | |||
| using Microsoft.SemanticKernel.AI; | |||
| using Microsoft.SemanticKernel.AI.TextCompletion; | |||
| using Microsoft.SemanticKernel; | |||
| using Microsoft.SemanticKernel.Services; | |||
| using Microsoft.SemanticKernel.TextGeneration; | |||
| using System.Runtime.CompilerServices; | |||
| using System.Text; | |||
| namespace LLamaSharp.SemanticKernel.TextCompletion; | |||
| public sealed class LLamaSharpTextCompletion : ITextCompletion | |||
| public sealed class LLamaSharpTextCompletion : ITextGenerationService | |||
| { | |||
| public ILLamaExecutor executor; | |||
| private readonly Dictionary<string, string> _attributes = new(); | |||
| private readonly Dictionary<string, object?> _attributes = new(); | |||
| public IReadOnlyDictionary<string, string> Attributes => this._attributes; | |||
| public IReadOnlyDictionary<string, object?> Attributes => this._attributes; | |||
| public LLamaSharpTextCompletion(ILLamaExecutor executor) | |||
| { | |||
| this.executor = executor; | |||
| } | |||
| public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default) | |||
| /// <inheritdoc/> | |||
| public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) | |||
| { | |||
| var settings = ChatRequestSettings.FromRequestSettings(requestSettings); | |||
| var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false); | |||
| var settings = ChatRequestSettings.FromRequestSettings(executionSettings); | |||
| var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| var sb = new StringBuilder(); | |||
| await foreach (var token in result) | |||
| { | |||
| sb.Append(token); | |||
| } | |||
| return new List<TextContent> { new(sb.ToString()) }; | |||
| } | |||
| #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously. | |||
| public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, AIRequestSettings? requestSettings,[EnumeratorCancellation] CancellationToken cancellationToken = default) | |||
| #pragma warning restore CS1998 | |||
| /// <inheritdoc/> | |||
| public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) | |||
| { | |||
| var settings = ChatRequestSettings.FromRequestSettings(requestSettings); | |||
| var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| yield return new LLamaTextResult(result); | |||
| var settings = ChatRequestSettings.FromRequestSettings(executionSettings); | |||
| var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| await foreach (var token in result) | |||
| { | |||
| yield return new StreamingTextContent(token); | |||
| } | |||
| } | |||
| } | |||
| @@ -1,37 +0,0 @@ | |||
| using Microsoft.SemanticKernel.AI.TextCompletion; | |||
| using Microsoft.SemanticKernel.Orchestration; | |||
| using System.Runtime.CompilerServices; | |||
| using System.Text; | |||
| namespace LLamaSharp.SemanticKernel.TextCompletion; | |||
| internal sealed class LLamaTextResult : ITextResult, ITextStreamingResult | |||
| { | |||
| private readonly IAsyncEnumerable<string> _text; | |||
| public LLamaTextResult(IAsyncEnumerable<string> text) | |||
| { | |||
| _text = text; | |||
| ModelResult = new(text); | |||
| } | |||
| public ModelResult ModelResult { get; } | |||
| public async Task<string> GetCompletionAsync(CancellationToken cancellationToken = default) | |||
| { | |||
| var sb = new StringBuilder(); | |||
| await foreach (var token in _text) | |||
| { | |||
| sb.Append(token); | |||
| } | |||
| return await Task.FromResult(sb.ToString()).ConfigureAwait(false); | |||
| } | |||
| public async IAsyncEnumerable<string> GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) | |||
| { | |||
| await foreach (string word in _text) | |||
| { | |||
| yield return word; | |||
| } | |||
| } | |||
| } | |||
| @@ -1,15 +1,16 @@ | |||
| using LLama; | |||
| using Microsoft.SemanticKernel.AI.Embeddings; | |||
| using Microsoft.SemanticKernel; | |||
| using Microsoft.SemanticKernel.Embeddings; | |||
| namespace LLamaSharp.SemanticKernel.TextEmbedding; | |||
| public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration | |||
| public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGenerationService | |||
| { | |||
| private LLamaEmbedder _embedder; | |||
| private readonly Dictionary<string, string> _attributes = new(); | |||
| private readonly Dictionary<string, object?> _attributes = new(); | |||
| public IReadOnlyDictionary<string, string> Attributes => this._attributes; | |||
| public IReadOnlyDictionary<string, object?> Attributes => this._attributes; | |||
| public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder) | |||
| { | |||
| @@ -17,7 +18,7 @@ public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration | |||
| } | |||
| /// <inheritdoc/> | |||
| public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, CancellationToken cancellationToken = default) | |||
| public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, Kernel? kernel = null, CancellationToken cancellationToken = default) | |||
| { | |||
| var embeddings = data.Select(text => new ReadOnlyMemory<float>(_embedder.GetEmbeddings(text))).ToList(); | |||
| return await Task.FromResult(embeddings); | |||
| @@ -1,5 +1,5 @@ | |||
| using LLamaSharp.SemanticKernel.ChatCompletion; | |||
| using Microsoft.SemanticKernel.AI; | |||
| using Microsoft.SemanticKernel; | |||
| namespace LLama.Unittest.SemanticKernel | |||
| { | |||
| @@ -75,9 +75,9 @@ namespace LLama.Unittest.SemanticKernel | |||
| public void ChatRequestSettings_FromAIRequestSettings() | |||
| { | |||
| // Arrange | |||
| var originalRequestSettings = new AIRequestSettings() | |||
| var originalRequestSettings = new PromptExecutionSettings() | |||
| { | |||
| ServiceId = "test", | |||
| ModelId = "test", | |||
| }; | |||
| // Act | |||
| @@ -85,16 +85,16 @@ namespace LLama.Unittest.SemanticKernel | |||
| // Assert | |||
| Assert.NotNull(requestSettings); | |||
| Assert.Equal(originalRequestSettings.ServiceId, requestSettings.ServiceId); | |||
| Assert.Equal(originalRequestSettings.ModelId, requestSettings.ModelId); | |||
| } | |||
| [Fact] | |||
| public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInSnakeCase() | |||
| { | |||
| // Arrange | |||
| var originalRequestSettings = new AIRequestSettings() | |||
| var originalRequestSettings = new PromptExecutionSettings() | |||
| { | |||
| ServiceId = "test", | |||
| ModelId = "test", | |||
| ExtensionData = new Dictionary<string, object> | |||
| { | |||
| { "frequency_penalty", 0.5 }, | |||
| @@ -131,9 +131,9 @@ namespace LLama.Unittest.SemanticKernel | |||
| public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInPascalCase() | |||
| { | |||
| // Arrange | |||
| var originalRequestSettings = new AIRequestSettings() | |||
| var originalRequestSettings = new PromptExecutionSettings() | |||
| { | |||
| ServiceId = "test", | |||
| ModelId = "test", | |||
| ExtensionData = new Dictionary<string, object> | |||
| { | |||
| { "FrequencyPenalty", 0.5 }, | |||
| @@ -20,30 +20,9 @@ namespace LLama | |||
| public int EmbeddingSize => _ctx.EmbeddingSize; | |||
| /// <summary> | |||
| /// Create a new embedder (loading temporary weights) | |||
| /// LLama Context | |||
| /// </summary> | |||
| /// <param name="allParams"></param> | |||
| /// <param name="logger"></param> | |||
| [Obsolete("Preload LLamaWeights and use the constructor which accepts them")] | |||
| public LLamaEmbedder(ILLamaParams allParams, ILogger? logger = null) | |||
| : this(allParams, allParams, logger) | |||
| { | |||
| } | |||
| /// <summary> | |||
| /// Create a new embedder (loading temporary weights) | |||
| /// </summary> | |||
| /// <param name="modelParams"></param> | |||
| /// <param name="contextParams"></param> | |||
| /// <param name="logger"></param> | |||
| [Obsolete("Preload LLamaWeights and use the constructor which accepts them")] | |||
| public LLamaEmbedder(IModelParams modelParams, IContextParams contextParams, ILogger? logger = null) | |||
| { | |||
| using var weights = LLamaWeights.LoadFromFile(modelParams); | |||
| contextParams.EmbeddingMode = true; | |||
| _ctx = weights.CreateContext(contextParams, logger); | |||
| } | |||
| public LLamaContext Context => this._ctx; | |||
| /// <summary> | |||
| /// Create a new embedder, using the given LLamaWeights | |||
| @@ -117,5 +96,6 @@ namespace LLama | |||
| { | |||
| _ctx.Dispose(); | |||
| } | |||
| } | |||
| } | |||