|
|
|
@@ -1,13 +1,6 @@ |
|
|
|
using LLama; |
|
|
|
using Microsoft.SemanticKernel.AI.ChatCompletion; |
|
|
|
using System; |
|
|
|
using System.Collections.Generic; |
|
|
|
using System.IO; |
|
|
|
using System.Linq; |
|
|
|
using System.Runtime.CompilerServices; |
|
|
|
using System.Text; |
|
|
|
using System.Threading; |
|
|
|
using System.Threading.Tasks; |
|
|
|
|
|
|
|
namespace LLamaSharp.SemanticKernel.ChatCompletion; |
|
|
|
|
|
|
|
@@ -19,12 +12,32 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion |
|
|
|
private const string UserRole = "user:"; |
|
|
|
private const string AssistantRole = "assistant:"; |
|
|
|
private ChatSession session; |
|
|
|
private ChatRequestSettings defaultRequestSettings; |
|
|
|
|
|
|
|
public LLamaSharpChatCompletion(InteractiveExecutor model) |
|
|
|
public LLamaSharpChatCompletion(InteractiveExecutor model, ChatRequestSettings? defaultRequestSettings = default) |
|
|
|
{ |
|
|
|
this.session = new ChatSession(model) |
|
|
|
.WithHistoryTransform(new HistoryTransform()) |
|
|
|
.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole })); |
|
|
|
this.defaultRequestSettings = defaultRequestSettings ??= new ChatRequestSettings() |
|
|
|
{ |
|
|
|
MaxTokens = 256, |
|
|
|
Temperature = 0, |
|
|
|
TopP = 0, |
|
|
|
StopSequences = new List<string> { } |
|
|
|
}; |
|
|
|
} |
|
|
|
|
|
|
|
public LLamaSharpChatCompletion(ChatSession session, ChatRequestSettings? defaultRequestSettings = default) |
|
|
|
{ |
|
|
|
this.session = session; |
|
|
|
this.defaultRequestSettings = defaultRequestSettings ??= new ChatRequestSettings() |
|
|
|
{ |
|
|
|
MaxTokens = 256, |
|
|
|
Temperature = 0, |
|
|
|
TopP = 0, |
|
|
|
StopSequences = new List<string> { } |
|
|
|
}; |
|
|
|
} |
|
|
|
|
|
|
|
/// <inheritdoc/> |
|
|
|
@@ -43,13 +56,7 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion |
|
|
|
/// <inheritdoc/> |
|
|
|
public async Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default) |
|
|
|
{ |
|
|
|
requestSettings ??= new ChatRequestSettings() |
|
|
|
{ |
|
|
|
MaxTokens = 256, |
|
|
|
Temperature = 0, |
|
|
|
TopP = 0, |
|
|
|
StopSequences = new List<string> { } |
|
|
|
}; |
|
|
|
requestSettings = requestSettings ?? this.defaultRequestSettings; |
|
|
|
|
|
|
|
var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); |
|
|
|
|
|
|
|
@@ -59,13 +66,7 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion |
|
|
|
/// <inheritdoc/> |
|
|
|
public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) |
|
|
|
{ |
|
|
|
requestSettings ??= new ChatRequestSettings() |
|
|
|
{ |
|
|
|
MaxTokens = 256, |
|
|
|
Temperature = 0, |
|
|
|
TopP = 0, |
|
|
|
StopSequences = new List<string> { } |
|
|
|
}; |
|
|
|
requestSettings = requestSettings ?? this.defaultRequestSettings; |
|
|
|
|
|
|
|
var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); |
|
|
|
|
|
|
|
|