From dced651f8bd4f6feec60e5d8efe8b0c6731e821d Mon Sep 17 00:00:00 2001 From: Tim Miller Date: Mon, 11 Sep 2023 19:21:51 +0900 Subject: [PATCH 1/2] Allow setting ChatRequestSettings Defaults and ChatSession --- .../LLamaSharpChatCompletion.cs | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs index 7fda3d4f..4571aec8 100644 --- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs @@ -1,13 +1,6 @@ using LLama; using Microsoft.SemanticKernel.AI.ChatCompletion; -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; using System.Runtime.CompilerServices; -using System.Text; -using System.Threading; -using System.Threading.Tasks; namespace LLamaSharp.SemanticKernel.ChatCompletion; @@ -19,12 +12,32 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion private const string UserRole = "user:"; private const string AssistantRole = "assistant:"; private ChatSession session; + private ChatRequestSettings defaultRequestSettings; - public LLamaSharpChatCompletion(InteractiveExecutor model) + public LLamaSharpChatCompletion(InteractiveExecutor model, ChatRequestSettings? defaultRequestSettings = default) { this.session = new ChatSession(model) .WithHistoryTransform(new HistoryTransform()) .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole })); + this.defaultRequestSettings = defaultRequestSettings ??= new ChatRequestSettings() + { + MaxTokens = 256, + Temperature = 0, + TopP = 0, + StopSequences = new List { } + }; + } + + public LLamaSharpChatCompletion(ChatSession session, ChatRequestSettings? defaultRequestSettings = default) + { + this.session = session; + this.defaultRequestSettings = defaultRequestSettings ??= new ChatRequestSettings() + { + MaxTokens = 256, + Temperature = 0, + TopP = 0, + StopSequences = new List { } + }; } /// @@ -43,13 +56,7 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion /// public async Task> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default) { - requestSettings ??= new ChatRequestSettings() - { - MaxTokens = 256, - Temperature = 0, - TopP = 0, - StopSequences = new List { } - }; + requestSettings = requestSettings ?? this.defaultRequestSettings; var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); @@ -59,13 +66,7 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion /// public async IAsyncEnumerable GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - requestSettings ??= new ChatRequestSettings() - { - MaxTokens = 256, - Temperature = 0, - TopP = 0, - StopSequences = new List { } - }; + requestSettings = requestSettings ?? this.defaultRequestSettings; var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); From abeab9f0a16571cb57cb8d6412d254ad8e02bdce Mon Sep 17 00:00:00 2001 From: Tim Miller Date: Tue, 12 Sep 2023 12:19:57 +0900 Subject: [PATCH 2/2] Bump dependencies --- LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs | 6 ++++++ LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs index ec479f42..2abea981 100644 --- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs @@ -1,4 +1,5 @@ using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel.Orchestration; using System.Runtime.CompilerServices; using System.Text; @@ -6,6 +7,7 @@ namespace LLamaSharp.SemanticKernel.ChatCompletion; internal sealed class LLamaSharpChatResult : IChatStreamingResult { + private readonly ModelResult _modelResult; private readonly IAsyncEnumerable _stream; /// @@ -15,7 +17,11 @@ internal sealed class LLamaSharpChatResult : IChatStreamingResult public LLamaSharpChatResult(IAsyncEnumerable stream) { _stream = stream; + this._modelResult = new ModelResult(stream); } + + public ModelResult ModelResult => this._modelResult; + /// public async Task GetChatMessageAsync(CancellationToken cancellationToken = default) { diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj index 8de65692..e7a7589d 100644 --- a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj +++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj @@ -33,7 +33,7 @@ - +