diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index c2b08305..ea5616b5 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -317,11 +317,11 @@ namespace LLama /// /// Asynchronously runs a prompt through the model to compute KV cache without generating any new tokens. + /// It could reduce the latency of the first time response if the first input from the user is not immediate. /// /// Prompt to process - /// A cancellation token /// - public virtual async Task AddPromptAsync(string prompt, CancellationToken cancellationToken = default) + public virtual async Task PrefillPromptAsync(string prompt) { var inferenceParams = new InferenceParams {