Browse Source

Merge pull request #356 from xbotter/deps/sk-rc3

bump sk to 1.0.1 & km to 0.18
tags/0.9.1
Martin Evans GitHub 2 years ago
parent
commit
f0d7468b22
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 163 additions and 252 deletions
  1. +6
    -0
      LLama.Examples/Examples/KernelMemory.cs
  2. +7
    -6
      LLama.Examples/Examples/SemanticKernelChat.cs
  3. +9
    -9
      LLama.Examples/Examples/SemanticKernelPrompt.cs
  4. +6
    -19
      LLama.Examples/LLama.Examples.csproj
  5. +13
    -23
      LLama.KernelMemory/BuilderExtensions.cs
  6. +1
    -3
      LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
  7. +22
    -9
      LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
  8. +12
    -5
      LLama.KernelMemory/LlamaSharpTextGenerator.cs
  9. +3
    -3
      LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
  10. +4
    -5
      LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs
  11. +35
    -21
      LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
  12. +0
    -14
      LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
  13. +0
    -44
      LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
  14. +1
    -2
      LLama.SemanticKernel/ExtensionMethods.cs
  15. +2
    -1
      LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
  16. +25
    -15
      LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
  17. +0
    -37
      LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs
  18. +6
    -5
      LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs
  19. +8
    -8
      LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
  20. +3
    -23
      LLama/LLamaEmbedder.cs

+ 6
- 0
LLama.Examples/Examples/KernelMemory.cs View File

@@ -16,6 +16,11 @@ namespace LLama.Examples.Examples
Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs");
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();
var searchClientConfig = new SearchClientConfig
{
MaxMatchesCount = 1,
AnswerTokens = 100,
};
var memory = new KernelMemoryBuilder()
.WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath)
{
@@ -24,6 +29,7 @@ namespace LLama.Examples.Examples
AntiPrompts = new List<string> { "\n\n" }
}
})
.WithSearchClientConfig(searchClientConfig)
.With(new TextPartitioningOptions
{
MaxTokensPerParagraph = 300,


+ 7
- 6
LLama.Examples/Examples/SemanticKernelChat.cs View File

@@ -2,6 +2,7 @@
using LLama.Common;
using Microsoft.SemanticKernel.AI.ChatCompletion;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.ChatCompletion;

namespace LLama.Examples.Examples
{
@@ -9,7 +10,7 @@ namespace LLama.Examples.Examples
{
public static async Task Run()
{
Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();

@@ -29,8 +30,8 @@ namespace LLama.Examples.Examples
await MessageOutputAsync(chatHistory);

// First bot assistant message
string reply = await chatGPT.GenerateMessageAsync(chatHistory);
chatHistory.AddAssistantMessage(reply);
var reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
chatHistory.AddAssistantMessage(reply.Content);
await MessageOutputAsync(chatHistory);

// Second user message
@@ -38,15 +39,15 @@ namespace LLama.Examples.Examples
await MessageOutputAsync(chatHistory);

// Second bot assistant message
reply = await chatGPT.GenerateMessageAsync(chatHistory);
chatHistory.AddAssistantMessage(reply);
reply = await chatGPT.GetChatMessageContentAsync(chatHistory);
chatHistory.AddAssistantMessage(reply.Content);
await MessageOutputAsync(chatHistory);
}

/// <summary>
/// Outputs the last message of the chat history
/// </summary>
private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory)
private static Task MessageOutputAsync(Microsoft.SemanticKernel.ChatCompletion.ChatHistory chatHistory)
{
var message = chatHistory.Last();



+ 9
- 9
LLama.Examples/Examples/SemanticKernelPrompt.cs View File

@@ -2,8 +2,9 @@
using LLama.Common;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.AI.TextCompletion;
using LLamaSharp.SemanticKernel.TextCompletion;
using Microsoft.SemanticKernel.TextGeneration;
using Microsoft.Extensions.DependencyInjection;

namespace LLama.Examples.Examples
{
@@ -11,7 +12,7 @@ namespace LLama.Examples.Examples
{
public static async Task Run()
{
Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();

@@ -20,8 +21,8 @@ namespace LLama.Examples.Examples
using var model = LLamaWeights.LoadFromFile(parameters);
var ex = new StatelessExecutor(model, parameters);

var builder = new KernelBuilder();
builder.WithAIService<ITextCompletion>("local-llama", new LLamaSharpTextCompletion(ex), true);
var builder = Kernel.CreateBuilder();
builder.Services.AddKeyedSingleton<ITextGenerationService>("local-llama", new LLamaSharpTextCompletion(ex));

var kernel = builder.Build();

@@ -29,8 +30,8 @@ namespace LLama.Examples.Examples

One line TLDR with the fewest words.";

ChatRequestSettings settings = new() {MaxTokens = 100};
var summarize = kernel.CreateSemanticFunction(prompt, requestSettings: settings);
ChatRequestSettings settings = new() { MaxTokens = 100 };
var summarize = kernel.CreateFunctionFromPrompt(prompt, settings);

string text1 = @"
1st Law of Thermodynamics - Energy cannot be created or destroyed.
@@ -42,10 +43,9 @@ One line TLDR with the fewest words.";
2. The acceleration of an object depends on the mass of the object and the amount of force applied.
3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";

Console.WriteLine((await kernel.RunAsync(text1, summarize)).GetValue<string>());
Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue<string>());

Console.WriteLine((await kernel.RunAsync(text2, summarize)).GetValue<string>());
Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue<string>());
}
}
}

+ 6
- 19
LLama.Examples/LLama.Examples.csproj View File

@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<Import Project="..\LLama\LLamaSharp.Runtime.targets" />
<PropertyGroup>
<OutputType>Exe</OutputType>
@@ -9,28 +9,15 @@
<!-- Set IncludeBuiltInRuntimes to false to include your own runtime libraries and not link the defaults -->
<IncludeBuiltInRuntimes>true</IncludeBuiltInRuntimes>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<NoWarn>1701;1702;8604</NoWarn>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<NoWarn>1701;1702;8604</NoWarn>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<NoWarn>1701;1702;8604</NoWarn>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<NoWarn>1701;1702;8604</NoWarn>
<LangVersion>12</LangVersion>
<NoWarn>1701;1702;8604;SKEXP0001;SKEXP0052;SKEXP0003</NoWarn>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.12.231123.1-preview" />
<PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta8" />
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.18.231209.1-preview" />
<PackageReference Include="Microsoft.SemanticKernel" Version="1.0.1" />
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.0.1-alpha" />
<PackageReference Include="Spectre.Console" Version="0.48.0" />
</ItemGroup>



+ 13
- 23
LLama.KernelMemory/BuilderExtensions.cs View File

@@ -17,19 +17,6 @@ namespace LLamaSharp.KernelMemory
public static class BuilderExtensions
{

private static IKernelMemoryBuilder WithCustomEmbeddingGeneration(this IKernelMemoryBuilder builder, ITextEmbeddingGeneration embeddingGeneration)
{
builder.AddSingleton<ITextEmbeddingGeneration>(embeddingGeneration);
builder.AddIngestionEmbeddingGenerator(embeddingGeneration);
return builder;
}

private static IKernelMemoryBuilder WithCustomTextGeneration(this IKernelMemoryBuilder builder, ITextGeneration textGeneration)
{
builder.AddSingleton<ITextGeneration>(textGeneration);
return builder;
}

/// <summary>
/// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder.
/// </summary>
@@ -38,7 +25,9 @@ namespace LLamaSharp.KernelMemory
/// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added.</returns>
public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config)
{
builder.WithCustomEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(config));
var generator = new LLamaSharpTextEmbeddingGenerator(config);
builder.AddSingleton<ITextEmbeddingGenerator>(generator);
builder.AddIngestionEmbeddingGenerator(generator);
return builder;
}

@@ -46,11 +35,12 @@ namespace LLamaSharp.KernelMemory
/// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder.
/// </summary>
/// <param name="builder">The KernelMemoryBuilder instance.</param>
/// <param name="textEmbeddingGeneration">The LLamaSharpTextEmbeddingGeneration instance.</param>
/// <param name="textEmbeddingGenerator">The LLamaSharpTextEmbeddingGeneration instance.</param>
/// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added.</returns>
public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGeneration textEmbeddingGeneration)
public static IKernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this IKernelMemoryBuilder builder, LLamaSharpTextEmbeddingGenerator textEmbeddingGenerator)
{
builder.WithCustomEmbeddingGeneration(textEmbeddingGeneration);
builder.AddSingleton<ITextEmbeddingGenerator>(textEmbeddingGenerator);
builder.AddIngestionEmbeddingGenerator(textEmbeddingGenerator);
return builder;
}

@@ -62,7 +52,7 @@ namespace LLamaSharp.KernelMemory
/// <returns>The KernelMemoryBuilder instance with LLamaSharpTextGeneration added.</returns>
public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LLamaSharpConfig config)
{
builder.WithCustomTextGeneration(new LlamaSharpTextGeneration(config));
builder.AddSingleton<ITextGenerator>(new LlamaSharpTextGenerator(config));
return builder;
}

@@ -70,11 +60,11 @@ namespace LLamaSharp.KernelMemory
/// Adds LLamaSharpTextGeneration to the KernelMemoryBuilder.
/// </summary>
/// <param name="builder">The KernelMemoryBuilder instance.</param>
/// <param name="textGeneration">The LlamaSharpTextGeneration instance.</param>
/// <param name="textGenerator">The LlamaSharpTextGeneration instance.</param>
/// <returns>The KernelMemoryBuilder instance with LLamaSharpTextGeneration added.</returns>
public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGeneration textGeneration)
public static IKernelMemoryBuilder WithLLamaSharpTextGeneration(this IKernelMemoryBuilder builder, LlamaSharpTextGenerator textGenerator)
{
builder.WithCustomTextGeneration(textGeneration);
builder.AddSingleton<ITextGenerator>(textGenerator);
return builder;
}

@@ -96,8 +86,8 @@ namespace LLamaSharp.KernelMemory
var context = weights.CreateContext(parameters);
var executor = new StatelessExecutor(weights, parameters);
var embedder = new LLamaEmbedder(weights, parameters);
builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(embedder));
builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGeneration(weights, context, executor, config?.DefaultInferenceParams));
builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGenerator(embedder));
builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGenerator(weights, context, executor, config?.DefaultInferenceParams));
return builder;
}
}


+ 1
- 3
LLama.KernelMemory/LLamaSharp.KernelMemory.csproj View File

@@ -4,8 +4,6 @@
<TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>

<Version>0.7.1</Version>
<Version>0.8.0</Version>
<Authors>Xbotter</Authors>
<Company>SciSharp STACK</Company>
@@ -29,7 +27,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.12.231123.1-preview" />
<PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.18.231209.1-preview" />
</ItemGroup>

<ItemGroup>


LLama.KernelMemory/LLamaSharpTextEmbeddingGeneration.cs → LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs View File

@@ -1,6 +1,8 @@
using LLama;
using LLama.Abstractions;
using LLama.Common;
using Microsoft.KernelMemory;
using Microsoft.KernelMemory.AI;
using Microsoft.SemanticKernel.AI.Embeddings;
using System;
using System.Collections.Generic;
@@ -13,22 +15,23 @@ namespace LLamaSharp.KernelMemory
/// <summary>
/// Provides text embedding generation for LLamaSharp.
/// </summary>
public class LLamaSharpTextEmbeddingGeneration : ITextEmbeddingGeneration, IDisposable
public class LLamaSharpTextEmbeddingGenerator
: ITextEmbeddingGenerator, IDisposable
{
private readonly LLamaSharpConfig? _config;
private readonly LLamaWeights? _weights;
private readonly LLamaEmbedder _embedder;
private bool _ownsEmbedder = false;
private bool _ownsWeights = false;
private readonly Dictionary<string, string> _attributes = new();

public IReadOnlyDictionary<string, string> Attributes => this._attributes;
/// <inheritdoc/>
public int MaxTokens => (int?)_config?.ContextSize ?? 2048;

/// <summary>
/// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class.
/// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class.
/// </summary>
/// <param name="config">The configuration for LLamaSharp.</param>
public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config)
public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
{
this._config = config;
var @params = new ModelParams(_config.ModelPath);
@@ -39,11 +42,11 @@ namespace LLamaSharp.KernelMemory
}

/// <summary>
/// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class from reused weights.
/// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class from reused weights.
/// </summary>
/// <param name="config">The configuration for LLamaSharp.</param>
/// <param name="weights">A LLamaWeights object.</param>
public LLamaSharpTextEmbeddingGeneration(LLamaSharpConfig config, LLamaWeights weights)
public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights weights)
{
this._config = config;
var @params = new ModelParams(_config.ModelPath);
@@ -53,10 +56,10 @@ namespace LLamaSharp.KernelMemory
}

/// <summary>
/// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGeneration"/> class from reused embedder.
/// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class from reused embedder.
/// </summary>
/// <param name="embedder">A LLamaEmbedder object.</param>
public LLamaSharpTextEmbeddingGeneration(LLamaEmbedder embedder)
public LLamaSharpTextEmbeddingGenerator(LLamaEmbedder embedder)
{
this._config = null;
this._weights = null;
@@ -89,5 +92,15 @@ namespace LLamaSharp.KernelMemory

return Task.FromResult(results);
}

/// <inheritdoc/>
public Task<Embedding> GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default)
{
var embeddings = _embedder.GetEmbeddings(text);
return Task.FromResult(new Embedding(embeddings));
}

/// <inheritdoc/>
public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length;
}
}

LLama.KernelMemory/LlamaSharpTextGeneration.cs → LLama.KernelMemory/LlamaSharpTextGenerator.cs View File

@@ -13,7 +13,7 @@ namespace LLamaSharp.KernelMemory
/// <summary>
/// Provides text generation for LLamaSharp.
/// </summary>
public class LlamaSharpTextGeneration : ITextGeneration, IDisposable
public class LlamaSharpTextGenerator : ITextGenerator, IDisposable
{
private readonly LLamaWeights _weights;
private readonly StatelessExecutor _executor;
@@ -22,11 +22,13 @@ namespace LLamaSharp.KernelMemory
private bool _ownsContext = false;
private bool _ownsWeights = false;

public int MaxTokenTotal { get; }

/// <summary>
/// Initializes a new instance of the <see cref="LlamaSharpTextGeneration"/> class.
/// Initializes a new instance of the <see cref="LlamaSharpTextGenerator"/> class.
/// </summary>
/// <param name="config">The configuration for LLamaSharp.</param>
public LlamaSharpTextGeneration(LLamaSharpConfig config)
public LlamaSharpTextGenerator(LLamaSharpConfig config)
{
var parameters = new ModelParams(config.ModelPath)
{
@@ -39,21 +41,23 @@ namespace LLamaSharp.KernelMemory
_executor = new StatelessExecutor(_weights, parameters);
_defaultInferenceParams = config?.DefaultInferenceParams;
_ownsWeights = _ownsContext = true;
MaxTokenTotal = (int)parameters.ContextSize;
}

/// <summary>
/// Initializes a new instance of the <see cref="LlamaSharpTextGeneration"/> class from reused weights, context and executor.
/// Initializes a new instance of the <see cref="LlamaSharpTextGenerator"/> class from reused weights, context and executor.
/// If executor is not specified, then a StatelessExecutor will be created with `context.Params`. So far only `StatelessExecutor` is expected.
/// </summary>
/// <param name="weights">A LLamaWeights object.</param>
/// <param name="context">A LLamaContext object.</param>
/// <param name="executor">An executor. Currently only StatelessExecutor is expected.</param>
public LlamaSharpTextGeneration(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null)
public LlamaSharpTextGenerator(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null)
{
_weights = weights;
_context = context;
_executor = executor ?? new StatelessExecutor(_weights, _context.Params);
_defaultInferenceParams = inferenceParams;
MaxTokenTotal = (int)_context.Params.ContextSize;
}

/// <inheritdoc/>
@@ -102,5 +106,8 @@ namespace LLamaSharp.KernelMemory
};
}
}

/// <inheritdoc/>
public int CountTokens(string text) => _context.Tokenize(text).Length;
}
}

+ 3
- 3
LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs View File

@@ -1,10 +1,10 @@
using Microsoft.SemanticKernel.AI;
using Microsoft.SemanticKernel;
using System.Text.Json;
using System.Text.Json.Serialization;

namespace LLamaSharp.SemanticKernel.ChatCompletion;

public class ChatRequestSettings : AIRequestSettings
public class ChatRequestSettings : PromptExecutionSettings
{
/// <summary>
/// Temperature controls the randomness of the completion.
@@ -68,7 +68,7 @@ public class ChatRequestSettings : AIRequestSettings
/// <param name="requestSettings">Template configuration</param>
/// <param name="defaultMaxTokens">Default max tokens</param>
/// <returns>An instance of OpenAIRequestSettings</returns>
public static ChatRequestSettings FromRequestSettings(AIRequestSettings? requestSettings, int? defaultMaxTokens = null)
public static ChatRequestSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null)
{
if (requestSettings is null)
{


+ 4
- 5
LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs View File

@@ -31,6 +31,10 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>

switch (propertyName)
{
case "MODELID":
case "MODEL_ID":
requestSettings.ModelId = reader.GetString();
break;
case "TEMPERATURE":
requestSettings.Temperature = reader.GetDouble();
break;
@@ -62,10 +66,6 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>
case "TOKEN_SELECTION_BIASES":
requestSettings.TokenSelectionBiases = JsonSerializer.Deserialize<IDictionary<int, int>>(ref reader, options) ?? new Dictionary<int, int>();
break;
case "SERVICEID":
case "SERVICE_ID":
requestSettings.ServiceId = reader.GetString();
break;
default:
reader.Skip();
break;
@@ -98,7 +98,6 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>
writer.WriteNumber("results_per_prompt", value.ResultsPerPrompt);
writer.WritePropertyName("token_selection_biases");
JsonSerializer.Serialize(writer, value.TokenSelectionBiases, options);
writer.WriteString("service_id", value.ServiceId);

writer.WriteEndObject();
}

+ 35
- 21
LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs View File

@@ -1,8 +1,12 @@
using LLama;
using LLama.Abstractions;
using Microsoft.SemanticKernel.AI;
using Microsoft.SemanticKernel.AI.ChatCompletion;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Services;
using System;
using System.IO;
using System.Runtime.CompilerServices;
using System.Text;
using static LLama.LLamaTransforms;

namespace LLamaSharp.SemanticKernel.ChatCompletion;
@@ -10,16 +14,16 @@ namespace LLamaSharp.SemanticKernel.ChatCompletion;
/// <summary>
/// LLamaSharp ChatCompletion
/// </summary>
public sealed class LLamaSharpChatCompletion : IChatCompletion
public sealed class LLamaSharpChatCompletion : IChatCompletionService
{
private readonly StatelessExecutor _model;
private ChatRequestSettings defaultRequestSettings;
private readonly IHistoryTransform historyTransform;
private readonly ITextStreamTransform outputTransform;

private readonly Dictionary<string, string> _attributes = new();
private readonly Dictionary<string, object?> _attributes = new();

public IReadOnlyDictionary<string, string> Attributes => this._attributes;
public IReadOnlyDictionary<string, object?> Attributes => this._attributes;

static ChatRequestSettings GetDefaultSettings()
{
@@ -45,7 +49,6 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
$"{LLama.Common.AuthorRole.System}:"});
}

/// <inheritdoc/>
public ChatHistory CreateNewChat(string? instructions = "")
{
var history = new ChatHistory();
@@ -59,30 +62,41 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
}

/// <inheritdoc/>
public Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
var settings = requestSettings != null
? ChatRequestSettings.FromRequestSettings(requestSettings)
: defaultRequestSettings;
var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory());
var settings = executionSettings != null
? ChatRequestSettings.FromRequestSettings(executionSettings)
: defaultRequestSettings;
var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory());

var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken);

return Task.FromResult<IReadOnlyList<IChatResult>>(new List<IChatResult> { new LLamaSharpChatResult(outputTransform.TransformAsync(result)) }.AsReadOnly());
var output = outputTransform.TransformAsync(result);

var sb = new StringBuilder();
await foreach (var token in output)
{
sb.Append(token);
}

return new List<ChatMessageContent> { new(AuthorRole.Assistant, sb.ToString()) }.AsReadOnly();
}

/// <inheritdoc/>
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously.
public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
#pragma warning restore CS1998
public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var settings = requestSettings != null
? ChatRequestSettings.FromRequestSettings(requestSettings)
: defaultRequestSettings;
var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory());
// This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable.
var settings = executionSettings != null
? ChatRequestSettings.FromRequestSettings(executionSettings)
: defaultRequestSettings;
var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory());
var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken);

yield return new LLamaSharpChatResult(outputTransform.TransformAsync(result));
var output = outputTransform.TransformAsync(result);

await foreach (var token in output)
{
yield return new StreamingChatMessageContent(AuthorRole.Assistant, token);
}
}
}

+ 0
- 14
LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs View File

@@ -1,14 +0,0 @@
using Microsoft.SemanticKernel.AI.ChatCompletion;

namespace LLamaSharp.SemanticKernel.ChatCompletion;

/// <summary>
/// LLamaSharp Chat Message
/// </summary>
public class LLamaSharpChatMessage : ChatMessage
{
/// <inheritdoc/>
public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content)
{
}
}

+ 0
- 44
LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs View File

@@ -1,44 +0,0 @@
using Microsoft.SemanticKernel.AI.ChatCompletion;
using Microsoft.SemanticKernel.Orchestration;
using System.Runtime.CompilerServices;
using System.Text;

namespace LLamaSharp.SemanticKernel.ChatCompletion;

internal sealed class LLamaSharpChatResult : IChatResult, IChatStreamingResult
{
private readonly ModelResult _modelResult;
private readonly IAsyncEnumerable<string> _stream;

/// <summary>
///
/// </summary>
/// <param name="stream"></param>
public LLamaSharpChatResult(IAsyncEnumerable<string> stream)
{
_stream = stream;
this._modelResult = new ModelResult(stream);
}

public ModelResult ModelResult => this._modelResult;

/// <inheritdoc/>
public async Task<ChatMessage> GetChatMessageAsync(CancellationToken cancellationToken = default)
{
var sb = new StringBuilder();
await foreach (var token in _stream)
{
sb.Append(token);
}
return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false);
}

/// <inheritdoc/>
public async IAsyncEnumerable<ChatMessage> GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
{
await foreach (var token in _stream)
{
yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token);
}
}
}

+ 1
- 2
LLama.SemanticKernel/ExtensionMethods.cs View File

@@ -1,6 +1,5 @@
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.AI.ChatCompletion;

using Microsoft.SemanticKernel.ChatCompletion;
namespace LLamaSharp.SemanticKernel;

public static class ExtensionMethods


+ 2
- 1
LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj View File

@@ -30,10 +30,11 @@
<Platforms>AnyCPU;x64;Arm64</Platforms>
<PackageId>LLamaSharp.semantic-kernel</PackageId>
<Configurations>Debug;Release;GPU</Configurations>
<NoWarn>SKEXP0001,SKEXP0052</NoWarn>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-beta8" />
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.1" />
</ItemGroup>

<ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">


+ 25
- 15
LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs View File

@@ -1,37 +1,47 @@
using LLama.Abstractions;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.AI;
using Microsoft.SemanticKernel.AI.TextCompletion;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Services;
using Microsoft.SemanticKernel.TextGeneration;
using System.Runtime.CompilerServices;
using System.Text;

namespace LLamaSharp.SemanticKernel.TextCompletion;

public sealed class LLamaSharpTextCompletion : ITextCompletion
public sealed class LLamaSharpTextCompletion : ITextGenerationService
{
public ILLamaExecutor executor;

private readonly Dictionary<string, string> _attributes = new();
private readonly Dictionary<string, object?> _attributes = new();

public IReadOnlyDictionary<string, string> Attributes => this._attributes;
public IReadOnlyDictionary<string, object?> Attributes => this._attributes;

public LLamaSharpTextCompletion(ILLamaExecutor executor)
{
this.executor = executor;
}

public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default)
/// <inheritdoc/>
public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
var settings = ChatRequestSettings.FromRequestSettings(requestSettings);
var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false);
var settings = ChatRequestSettings.FromRequestSettings(executionSettings);
var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
var sb = new StringBuilder();
await foreach (var token in result)
{
sb.Append(token);
}
return new List<TextContent> { new(sb.ToString()) };
}

#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously.
public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, AIRequestSettings? requestSettings,[EnumeratorCancellation] CancellationToken cancellationToken = default)
#pragma warning restore CS1998
/// <inheritdoc/>
public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var settings = ChatRequestSettings.FromRequestSettings(requestSettings);
var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
yield return new LLamaTextResult(result);
var settings = ChatRequestSettings.FromRequestSettings(executionSettings);
var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
await foreach (var token in result)
{
yield return new StreamingTextContent(token);
}
}
}

+ 0
- 37
LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs View File

@@ -1,37 +0,0 @@
using Microsoft.SemanticKernel.AI.TextCompletion;
using Microsoft.SemanticKernel.Orchestration;
using System.Runtime.CompilerServices;
using System.Text;

namespace LLamaSharp.SemanticKernel.TextCompletion;

internal sealed class LLamaTextResult : ITextResult, ITextStreamingResult
{
private readonly IAsyncEnumerable<string> _text;

public LLamaTextResult(IAsyncEnumerable<string> text)
{
_text = text;
ModelResult = new(text);
}

public ModelResult ModelResult { get; }

public async Task<string> GetCompletionAsync(CancellationToken cancellationToken = default)
{
var sb = new StringBuilder();
await foreach (var token in _text)
{
sb.Append(token);
}
return await Task.FromResult(sb.ToString()).ConfigureAwait(false);
}

public async IAsyncEnumerable<string> GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
{
await foreach (string word in _text)
{
yield return word;
}
}
}

+ 6
- 5
LLama.SemanticKernel/TextEmbedding/LLamaSharpEmbeddingGeneration.cs View File

@@ -1,15 +1,16 @@
using LLama;
using Microsoft.SemanticKernel.AI.Embeddings;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Embeddings;

namespace LLamaSharp.SemanticKernel.TextEmbedding;

public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGenerationService
{
private LLamaEmbedder _embedder;

private readonly Dictionary<string, string> _attributes = new();
private readonly Dictionary<string, object?> _attributes = new();

public IReadOnlyDictionary<string, string> Attributes => this._attributes;
public IReadOnlyDictionary<string, object?> Attributes => this._attributes;

public LLamaSharpEmbeddingGeneration(LLamaEmbedder embedder)
{
@@ -17,7 +18,7 @@ public sealed class LLamaSharpEmbeddingGeneration : ITextEmbeddingGeneration
}

/// <inheritdoc/>
public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, CancellationToken cancellationToken = default)
public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
var embeddings = data.Select(text => new ReadOnlyMemory<float>(_embedder.GetEmbeddings(text))).ToList();
return await Task.FromResult(embeddings);


+ 8
- 8
LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs View File

@@ -1,5 +1,5 @@
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.AI;
using Microsoft.SemanticKernel;

namespace LLama.Unittest.SemanticKernel
{
@@ -75,9 +75,9 @@ namespace LLama.Unittest.SemanticKernel
public void ChatRequestSettings_FromAIRequestSettings()
{
// Arrange
var originalRequestSettings = new AIRequestSettings()
var originalRequestSettings = new PromptExecutionSettings()
{
ServiceId = "test",
ModelId = "test",
};

// Act
@@ -85,16 +85,16 @@ namespace LLama.Unittest.SemanticKernel

// Assert
Assert.NotNull(requestSettings);
Assert.Equal(originalRequestSettings.ServiceId, requestSettings.ServiceId);
Assert.Equal(originalRequestSettings.ModelId, requestSettings.ModelId);
}

[Fact]
public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInSnakeCase()
{
// Arrange
var originalRequestSettings = new AIRequestSettings()
var originalRequestSettings = new PromptExecutionSettings()
{
ServiceId = "test",
ModelId = "test",
ExtensionData = new Dictionary<string, object>
{
{ "frequency_penalty", 0.5 },
@@ -131,9 +131,9 @@ namespace LLama.Unittest.SemanticKernel
public void ChatRequestSettings_FromAIRequestSettingsWithExtraPropertiesInPascalCase()
{
// Arrange
var originalRequestSettings = new AIRequestSettings()
var originalRequestSettings = new PromptExecutionSettings()
{
ServiceId = "test",
ModelId = "test",
ExtensionData = new Dictionary<string, object>
{
{ "FrequencyPenalty", 0.5 },


+ 3
- 23
LLama/LLamaEmbedder.cs View File

@@ -20,30 +20,9 @@ namespace LLama
public int EmbeddingSize => _ctx.EmbeddingSize;

/// <summary>
/// Create a new embedder (loading temporary weights)
/// LLama Context
/// </summary>
/// <param name="allParams"></param>
/// <param name="logger"></param>
[Obsolete("Preload LLamaWeights and use the constructor which accepts them")]
public LLamaEmbedder(ILLamaParams allParams, ILogger? logger = null)
: this(allParams, allParams, logger)
{
}

/// <summary>
/// Create a new embedder (loading temporary weights)
/// </summary>
/// <param name="modelParams"></param>
/// <param name="contextParams"></param>
/// <param name="logger"></param>
[Obsolete("Preload LLamaWeights and use the constructor which accepts them")]
public LLamaEmbedder(IModelParams modelParams, IContextParams contextParams, ILogger? logger = null)
{
using var weights = LLamaWeights.LoadFromFile(modelParams);

contextParams.EmbeddingMode = true;
_ctx = weights.CreateContext(contextParams, logger);
}
public LLamaContext Context => this._ctx;

/// <summary>
/// Create a new embedder, using the given LLamaWeights
@@ -117,5 +96,6 @@ namespace LLama
{
_ctx.Dispose();
}

}
}

Loading…
Cancel
Save