diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs
index b1712ce5..d35e075e 100644
--- a/LLama/Abstractions/ILLamaExecutor.cs
+++ b/LLama/Abstractions/ILLamaExecutor.cs
@@ -15,6 +15,7 @@ namespace LLama.Abstractions
/// The loaded model for this executor.
///
public LLamaModel Model { get; }
+
///
/// Infers a response from the model.
///
@@ -24,6 +25,13 @@ namespace LLama.Abstractions
///
IEnumerable Infer(string text, InferenceParams? inferenceParams = null, CancellationToken token = default);
+ ///
+ /// Asynchronously infers a response from the model.
+ ///
+ /// Your prompt
+ /// Any additional parameters
+ /// A cancellation token.
+ ///
IAsyncEnumerable InferAsync(string text, InferenceParams? inferenceParams = null, CancellationToken token = default);
}
}
diff --git a/LLama/ChatSession.cs b/LLama/ChatSession.cs
index 26fad9d3..b87e8984 100644
--- a/LLama/ChatSession.cs
+++ b/LLama/ChatSession.cs
@@ -134,8 +134,9 @@ namespace LLama
///
/// Get the response from the LLama model with chat histories.
///
- ///
+ ///
///
+ ///
///
public IEnumerable Chat(ChatHistory history, InferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
{
@@ -156,6 +157,7 @@ namespace LLama
///
///
///
+ ///
///
public IEnumerable Chat(string prompt, InferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
{
@@ -176,8 +178,9 @@ namespace LLama
///
/// Get the response from the LLama model with chat histories.
///
- ///
+ ///
///
+ ///
///
public async IAsyncEnumerable ChatAsync(ChatHistory history, InferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
diff --git a/LLama/Common/ChatHistory.cs b/LLama/Common/ChatHistory.cs
index fecf9004..7224b314 100644
--- a/LLama/Common/ChatHistory.cs
+++ b/LLama/Common/ChatHistory.cs
@@ -1,16 +1,33 @@
-using System;
-using System.Collections.Generic;
-using System.Text;
+using System.Collections.Generic;
namespace LLama.Common
{
+ ///
+ /// Role of the message author, e.g. user/assistant/system
+ ///
public enum AuthorRole
{
+ ///
+ /// Role is unknown
+ ///
Unknown = -1,
+
+ ///
+ /// Message comes from a "system" prompt, not written by a user or language model
+ ///
System = 0,
+
+ ///
+ /// Message comes from the user
+ ///
User = 1,
+
+ ///
+ /// Messages was generated by the language model
+ ///
Assistant = 2,
}
+
// copy from semantic-kernel
///
/// The chat history class
diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs
index d74a670e..dd206829 100644
--- a/LLama/Common/FixedSizeQueue.cs
+++ b/LLama/Common/FixedSizeQueue.cs
@@ -24,7 +24,7 @@ namespace LLama.Common
}
///
- /// Fill the quene with the data. Please ensure that data.Count <= size
+ /// Fill the quene with the data. Please ensure that data.Count <= size
///
///
///
diff --git a/LLama/Exceptions/RuntimeError.cs b/LLama/Exceptions/RuntimeError.cs
index 4d042d1d..789f035a 100644
--- a/LLama/Exceptions/RuntimeError.cs
+++ b/LLama/Exceptions/RuntimeError.cs
@@ -1,6 +1,4 @@
using System;
-using System.Collections.Generic;
-using System.Text;
namespace LLama.Exceptions
{
diff --git a/LLama/LLamaQuantizer.cs b/LLama/LLamaQuantizer.cs
index 2114d0be..c3ff5613 100644
--- a/LLama/LLamaQuantizer.cs
+++ b/LLama/LLamaQuantizer.cs
@@ -18,10 +18,12 @@ namespace LLama
/// The path to save the quantized model.
/// The type of quantization.
/// Thread to be used during the quantization. By default it's the physical core number.
+ ///
+ ///
/// Whether the quantization is successful.
///
public static unsafe bool Quantize(string srcFileName, string dstFilename, LLamaFtype ftype, int nthread = -1, bool allowRequantize = true,
- bool quantizeOutputTensor = false)
+ bool quantizeOutputTensor = false)
{
if (!ValidateFtype(ftype))
{
@@ -45,10 +47,12 @@ namespace LLama
/// The path to save the quantized model.
/// The type of quantization.
/// Thread to be used during the quantization. By default it's the physical core number.
+ ///
+ ///
/// Whether the quantization is successful.
///
public static bool Quantize(string srcFileName, string dstFilename, string ftype, int nthread = -1, bool allowRequantize = true,
- bool quantizeOutputTensor = false)
+ bool quantizeOutputTensor = false)
{
return Quantize(srcFileName, dstFilename, StringToFtype(ftype), nthread, allowRequantize, quantizeOutputTensor);
}
diff --git a/LLama/LLamaTransforms.cs b/LLama/LLamaTransforms.cs
index 14e9ccb9..958f09ff 100644
--- a/LLama/LLamaTransforms.cs
+++ b/LLama/LLamaTransforms.cs
@@ -159,8 +159,8 @@ namespace LLama
/// Keywords that you want to remove from the response.
/// The extra length when searching for the keyword. For example, if your only keyword is "highlight",
/// maybe the token you get is "\r\nhighligt". In this condition, if redundancyLength=0, the token cannot be successfully matched because the length of "\r\nhighligt" (10)
- /// has already exceeded the maximum length of the keywords (8). On the contrary, setting redundancyLengyh >= 2 leads to successful match.
- /// The larger the redundancyLength is, the lower the processing speed. But as an experience, it won't introduce too much performance impact when redundancyLength <= 5
+ /// has already exceeded the maximum length of the keywords (8). On the contrary, setting redundancyLengyh >= 2 leads to successful match.
+ /// The larger the redundancyLength is, the lower the processing speed. But as an experience, it won't introduce too much performance impact when redundancyLength <= 5
/// If set to true, when getting a matched keyword, all the related tokens will be removed. Otherwise only the part of keyword will be removed.
public KeywordTextOutputStreamTransform(IEnumerable keywords, int redundancyLength = 3, bool removeAllMatchedTokens = false)
{
diff --git a/LLama/Native/LLamaModelQuantizeParams.cs b/LLama/Native/LLamaModelQuantizeParams.cs
index ebbfb1de..17ec035a 100644
--- a/LLama/Native/LLamaModelQuantizeParams.cs
+++ b/LLama/Native/LLamaModelQuantizeParams.cs
@@ -8,7 +8,7 @@ namespace LLama.Native
public struct LLamaModelQuantizeParams
{
///
- /// number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
+ /// number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
///
public int nthread;
///
diff --git a/LLama/Native/NativeApi.Quantize.cs b/LLama/Native/NativeApi.Quantize.cs
index c1eed4e4..8b201dde 100644
--- a/LLama/Native/NativeApi.Quantize.cs
+++ b/LLama/Native/NativeApi.Quantize.cs
@@ -12,8 +12,7 @@ namespace LLama.Native
///
///
///
- ///
- /// how many threads to use. If <=0, will use std::thread::hardware_concurrency(), else the number given
+ ///
/// not great API - very likely to change
/// Returns 0 on success
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
diff --git a/LLama/OldVersion/ChatSession.cs b/LLama/OldVersion/ChatSession.cs
index d6e9bfc6..1bf954fa 100644
--- a/LLama/OldVersion/ChatSession.cs
+++ b/LLama/OldVersion/ChatSession.cs
@@ -39,9 +39,9 @@ namespace LLama.OldVersion
}
///
- /// Set the keyword to split the return value of chat AI.
+ /// Set the keywords to split the return value of chat AI.
///
- ///
+ ///
///
public ChatSession WithAntiprompt(string[] antiprompt)
{
diff --git a/LLama/OldVersion/LLamaModel.cs b/LLama/OldVersion/LLamaModel.cs
index 41986954..46fc7e63 100644
--- a/LLama/OldVersion/LLamaModel.cs
+++ b/LLama/OldVersion/LLamaModel.cs
@@ -796,6 +796,7 @@ namespace LLama.OldVersion
}
}
+ ///
public void Dispose()
{
_ctx.Dispose();