diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index f7bf2494..507f041b 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -2,6 +2,7 @@ using LLama.Batched; using LLama.Common; using Spectre.Console; +using LLama.Abstractions; namespace LLama.Examples.Examples { @@ -99,7 +100,10 @@ namespace LLama.Examples.Examples // Initilize Images in executor // - ex.ImagePaths = imagePaths.ToList(); + foreach (var image in imagePaths) + { + ex.Images.Add(File.ReadAllBytes(image)); + } } Console.ForegroundColor = Color.White; diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs index ee4cf512..d6c8d2ce 100644 --- a/LLama/Abstractions/ILLamaExecutor.cs +++ b/LLama/Abstractions/ILLamaExecutor.cs @@ -22,14 +22,13 @@ namespace LLama.Abstractions /// /// Muti-Modal Projections / Clip Model weights /// - public LLavaWeights? ClipModel { get; } - + public LLavaWeights? ClipModel { get; } + /// - /// List of images: Image filename and path (jpeg images). + /// List of images: Image filen path, uri or image byte array. See ImageData. /// - public List ImagePaths { get; set; } - - + public List Images { get; } + /// /// Asynchronously infers a response from the model. /// diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index 52b38e18..65c0dcb4 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -76,11 +76,11 @@ namespace LLama } /// - public LLavaWeights? ClipModel { get; } - + public LLavaWeights? ClipModel { get; } + /// - public List ImagePaths { get; set; } - + public List Images { get; set; } + /// /// Current "mu" value for mirostat sampling /// @@ -95,7 +95,7 @@ namespace LLama /// protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null) { - ImagePaths = new List(); + Images = new List(); _logger = logger; Context = context; _pastTokensCount = 0; @@ -105,6 +105,12 @@ namespace LLama _decoder = new StreamingTokenDecoder(context); } + /// + /// + /// + /// + /// + /// public StatefulExecutorBase(LLamaContext context, LLavaWeights lLavaWeights, ILogger? logger = null) : this( context, logger ) { diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index 21bb8dcc..a87a0f37 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -11,6 +11,7 @@ using System.Threading.Tasks; using LLama.Exceptions; using LLama.Extensions; using Microsoft.Extensions.Logging; +using System.Net.Http; namespace LLama { @@ -148,13 +149,13 @@ namespace LLama int usedTokens = 0; // If the prompt contains the tag extract this. _imageInPrompt = text.Contains(""); - if (_imageInPrompt) + if (_imageInPrompt && ClipModel != null) { - foreach (var image in ImagePaths) + foreach (var image in Images) { - _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromFileName( ClipModel.NativeHandle, Context, image ) ); + _imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromMemory(ClipModel.NativeHandle, Context, image)); } - + int imageIndex = text.IndexOf(""); // Tokenize segment 1 (before tag) string preImagePrompt = text.Substring(0, imageIndex); diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs index 9d705af1..f9d6ca5b 100644 --- a/LLama/LLamaStatelessExecutor.cs +++ b/LLama/LLamaStatelessExecutor.cs @@ -26,10 +26,16 @@ namespace LLama // LLava Section public bool IsMultiModal => false; + + /// public bool MultiModalProject { get; } - public LLavaWeights? ClipModel { get; } - public List ImagePaths { get; set; } - + + /// + public LLavaWeights? ClipModel { get; } + + /// + public List Images { get; set; } + /// /// The context used by the executor when running the inference. /// @@ -43,7 +49,7 @@ namespace LLama /// public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null) { - ImagePaths = new List(); + Images = new List(); _weights = weights; _params = @params; _logger = logger;