From 6a7e74e71b15ce585aea06a35674da01ff81d84c Mon Sep 17 00:00:00 2001 From: Yaohui Liu Date: Sat, 4 Nov 2023 22:38:06 +0800 Subject: [PATCH 1/6] build: add package for kernel-memory integration. --- .../LLamaSharp.KernelMemory.csproj | 23 ++++++++++++++++++- .../LLamaSharp.SemanticKernel.csproj | 6 ++--- README.md | 9 +++++++- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj index 54766b02..de5f42a5 100644 --- a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj +++ b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj @@ -1,9 +1,30 @@ - net6.0 + netstandard2.0;net6.0;net7.0 enable enable + + 0.7.1 + Xbotter + SciSharp STACK + true + MIT, SciSharp STACK $([System.DateTime]::UtcNow.ToString(yyyy)) + https://github.com/SciSharp/LLamaSharp + git + https://avatars3.githubusercontent.com/u/44989469?s=200&v=4 + LLama, LLM, GPT, ChatGPT, kernel-memory, vector search, SciSharp + + The integration of LLamaSharp and Microsoft kernel-memory. It could make it easy to support document search for LLamaSharp model inference. + + + Support integration with kernel-memory + + MIT + packages + AnyCPU;x64;Arm64 + LLamaSharp.kernel-memory + Debug;Release;GPU diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj index 77596d57..c6ece4e7 100644 --- a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj +++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj @@ -10,8 +10,8 @@ enable enable - 0.6.2-beta1 - Tim Miller + 0.7.1 + Tim Miller, Xbotter SciSharp STACK true MIT, SciSharp STACK $([System.DateTime]::UtcNow.ToString(yyyy)) @@ -20,7 +20,7 @@ https://avatars3.githubusercontent.com/u/44989469?s=200&v=4 LLama, LLM, GPT, ChatGPT, semantic-kernel, SciSharp - The integration of LLamaSharp ans semantic-kernel. + The integration of LLamaSharp and Microsoft semantic-kernel. Support integration with semantic-kernel diff --git a/README.md b/README.md index c3b73f8c..d116d1a4 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,13 @@ For [microsoft semantic-kernel](https://github.com/microsoft/semantic-kernel) in LLamaSharp.semantic-kernel ``` +For [microsoft kernel-memory](https://github.com/microsoft/kernel-memory) integration, please search and install the following package: + +``` +LLamaSharp.kernel-memory +``` + + ### Tips for choosing a version In general, there may be some break changes between two minor releases, for example 0.5.1 and 0.6.0. On the contrary, we don't introduce API break changes in patch release. Therefore it's recommended to keep the highest patch version of a minor release. For example, keep 0.5.6 instead of 0.5.3. @@ -196,7 +203,7 @@ Another choice is generate gguf format file yourself with a pytorch weight (or a 🔳 Fine-tune -⚠️ Local document search (enabled by kernel-memory now) +✅ Local document search (enabled by kernel-memory now) 🔳 MAUI Integration From 0f12566f654f430f50480128ae65e5f588f6dc45 Mon Sep 17 00:00:00 2001 From: Yaohui Liu Date: Sun, 5 Nov 2023 02:55:41 +0800 Subject: [PATCH 2/6] build: use only net6.0 with kernel-memory. --- LLama.KernelMemory/LLamaSharp.KernelMemory.csproj | 2 +- README.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj index de5f42a5..7fd99e2c 100644 --- a/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj +++ b/LLama.KernelMemory/LLamaSharp.KernelMemory.csproj @@ -1,7 +1,7 @@ - netstandard2.0;net6.0;net7.0 + net6.0 enable enable diff --git a/README.md b/README.md index d116d1a4..96c9883a 100644 --- a/README.md +++ b/README.md @@ -54,13 +54,12 @@ For [microsoft semantic-kernel](https://github.com/microsoft/semantic-kernel) in LLamaSharp.semantic-kernel ``` -For [microsoft kernel-memory](https://github.com/microsoft/kernel-memory) integration, please search and install the following package: +For [microsoft kernel-memory](https://github.com/microsoft/kernel-memory) integration, please search and install the following package (currently kernel-memory only supports net6.0): ``` LLamaSharp.kernel-memory ``` - ### Tips for choosing a version In general, there may be some break changes between two minor releases, for example 0.5.1 and 0.6.0. On the contrary, we don't introduce API break changes in patch release. Therefore it's recommended to keep the highest patch version of a minor release. For example, keep 0.5.6 instead of 0.5.3. From 457958435b07603e9f41dad62af2ca60e621bae1 Mon Sep 17 00:00:00 2001 From: Yaohui Liu Date: Sun, 5 Nov 2023 02:59:41 +0800 Subject: [PATCH 3/6] build: use semantic-kernel beta1 in examples. --- LLama.Examples/LLama.Examples.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj index 3ecacdfe..c1761829 100644 --- a/LLama.Examples/LLama.Examples.csproj +++ b/LLama.Examples/LLama.Examples.csproj @@ -29,7 +29,7 @@ - + From a288e7c02bf0cc7b74aaca989e6dbd3913040db5 Mon Sep 17 00:00:00 2001 From: Philipp Bauer Date: Mon, 6 Nov 2023 18:20:07 -0600 Subject: [PATCH 4/6] Prevent duplication of user prompts / chat history in ChatSession. The way ChatSession.ChatAsync was using the provided methods from a IHistoryTransform interface implementation created unexpected duplication of the chat history messages. It also prevented loading previous history into the current session. --- LLama/ChatSession.cs | 50 ++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/LLama/ChatSession.cs b/LLama/ChatSession.cs index 457e7e48..358d70c3 100644 --- a/LLama/ChatSession.cs +++ b/LLama/ChatSession.cs @@ -95,11 +95,11 @@ namespace LLama Directory.CreateDirectory(path); } _executor.Context.SaveState(Path.Combine(path, _modelStateFilename)); - if(Executor is StatelessExecutor) + if (Executor is StatelessExecutor) { } - else if(Executor is StatefulExecutorBase statefulExecutor) + else if (Executor is StatefulExecutorBase statefulExecutor) { statefulExecutor.SaveState(Path.Combine(path, _executorStateFilename)); } @@ -135,30 +135,54 @@ namespace LLama } /// - /// Get the response from the LLama model. Note that prompt could not only be the preset words, - /// but also the question you want to ask. + /// Generates a response for a given user prompt and manages history state for the user. + /// This will always pass the whole history to the model. Don't pass a whole history + /// to this method as the user prompt will be appended to the history of the current session. + /// If more control is needed, use the other overload of this method that accepts a ChatHistory object. /// /// /// /// - /// + /// Returns generated tokens of the assistant message. public async IAsyncEnumerable ChatAsync(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - foreach(var inputTransform in InputTransformPipeline) + foreach (var inputTransform in InputTransformPipeline) prompt = inputTransform.Transform(prompt); - - History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.User, prompt).Messages); + + History.Messages.Add(new ChatHistory.Message(AuthorRole.User, prompt)); + + string internalPrompt = HistoryTransform.HistoryToText(History); + StringBuilder sb = new(); - await foreach (var result in ChatAsyncInternal(prompt, inferenceParams, cancellationToken)) + + await foreach (var result in ChatAsyncInternal(internalPrompt, inferenceParams, cancellationToken)) { yield return result; sb.Append(result); } - History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.Assistant, sb.ToString()).Messages); + + string assistantMessage = sb.ToString(); + + // Remove end tokens from the assistant message + // if defined in inferenceParams.AntiPrompts. + // We only want the response that was generated and not tokens + // that are delimiting the beginning or end of the response. + if (inferenceParams?.AntiPrompts != null) + { + foreach (var stopToken in inferenceParams.AntiPrompts) + { + assistantMessage = assistantMessage.Replace(stopToken, ""); + } + } + + History.Messages.Add(new ChatHistory.Message(AuthorRole.Assistant, assistantMessage)); } /// - /// Get the response from the LLama model with chat histories. + /// Generates a response for a given chat history. This method does not manage history state for the user. + /// If you want to e.g. truncate the history of a session to fit into the model's context window, + /// use this method and pass the truncated history to it. If you don't need this control, use the other + /// overload of this method that accepts a user prompt instead. /// /// /// @@ -167,14 +191,14 @@ namespace LLama public async IAsyncEnumerable ChatAsync(ChatHistory history, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { var prompt = HistoryTransform.HistoryToText(history); - History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.User, prompt).Messages); + StringBuilder sb = new(); + await foreach (var result in ChatAsyncInternal(prompt, inferenceParams, cancellationToken)) { yield return result; sb.Append(result); } - History.Messages.AddRange(HistoryTransform.TextToHistory(AuthorRole.Assistant, sb.ToString()).Messages); } private async IAsyncEnumerable ChatAsyncInternal(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) From 6ea40d15461a5243bfdd453bd6d2ede3cdaa5eaa Mon Sep 17 00:00:00 2001 From: Philipp Bauer Date: Wed, 8 Nov 2023 13:18:32 -0600 Subject: [PATCH 5/6] Use full history only when the ChatSession runs the first time --- LLama/ChatSession.cs | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/LLama/ChatSession.cs b/LLama/ChatSession.cs index 358d70c3..68c3c093 100644 --- a/LLama/ChatSession.cs +++ b/LLama/ChatSession.cs @@ -1,11 +1,14 @@ using LLama.Abstractions; using LLama.Common; +using System; using System.Collections.Generic; using System.IO; +using System.Linq; using System.Runtime.CompilerServices; using System.Text; using System.Threading; using System.Threading.Tasks; +using static LLama.InteractiveExecutor; namespace LLama { @@ -151,11 +154,17 @@ namespace LLama History.Messages.Add(new ChatHistory.Message(AuthorRole.User, prompt)); - string internalPrompt = HistoryTransform.HistoryToText(History); + if (_executor is InteractiveExecutor executor) + { + InteractiveExecutorState state = (InteractiveExecutorState)executor.GetStateData(); + prompt = state.IsPromptRun + ? HistoryTransform.HistoryToText(History) + : prompt; + } StringBuilder sb = new(); - await foreach (var result in ChatAsyncInternal(internalPrompt, inferenceParams, cancellationToken)) + await foreach (var result in ChatAsyncInternal(prompt, inferenceParams, cancellationToken)) { yield return result; sb.Append(result); @@ -190,14 +199,28 @@ namespace LLama /// public async IAsyncEnumerable ChatAsync(ChatHistory history, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - var prompt = HistoryTransform.HistoryToText(history); + if (history.Messages.Count == 0) + { + throw new ArgumentException("History must contain at least one message."); + } - StringBuilder sb = new(); + string prompt; + if (_executor is InteractiveExecutor executor) + { + InteractiveExecutorState state = (InteractiveExecutorState)executor.GetStateData(); + + prompt = state.IsPromptRun + ? HistoryTransform.HistoryToText(History) + : history.Messages.Last().Content; + } + else + { + prompt = history.Messages.Last().Content; + } await foreach (var result in ChatAsyncInternal(prompt, inferenceParams, cancellationToken)) { yield return result; - sb.Append(result); } } From d2b544afb8225600ff9b4d07112315d8eddbffd7 Mon Sep 17 00:00:00 2001 From: Philipp Bauer Date: Wed, 8 Nov 2023 13:23:21 -0600 Subject: [PATCH 6/6] Improved method return description --- LLama/ChatSession.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/LLama/ChatSession.cs b/LLama/ChatSession.cs index 68c3c093..7ee99590 100644 --- a/LLama/ChatSession.cs +++ b/LLama/ChatSession.cs @@ -146,7 +146,7 @@ namespace LLama /// /// /// - /// Returns generated tokens of the assistant message. + /// Returns generated text of the assistant message. public async IAsyncEnumerable ChatAsync(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { foreach (var inputTransform in InputTransformPipeline) @@ -196,7 +196,7 @@ namespace LLama /// /// /// - /// + /// Returns generated text of the assistant message. public async IAsyncEnumerable ChatAsync(ChatHistory history, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { if (history.Messages.Count == 0)