Browse Source

Merge pull request #653 from zsogitbe/master

Extension LLava with in memory images
pull/664/head
jlsantiago GitHub 2 years ago
parent
commit
8dd9101f8d
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
5 changed files with 36 additions and 20 deletions
  1. +5
    -1
      LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
  2. +5
    -6
      LLama/Abstractions/ILLamaExecutor.cs
  3. +11
    -5
      LLama/LLamaExecutorBase.cs
  4. +5
    -4
      LLama/LLamaInteractExecutor.cs
  5. +10
    -4
      LLama/LLamaStatelessExecutor.cs

+ 5
- 1
LLama.Examples/Examples/LlavaInteractiveModeExecute.cs View File

@@ -2,6 +2,7 @@
using LLama.Batched;
using LLama.Common;
using Spectre.Console;
using LLama.Abstractions;

namespace LLama.Examples.Examples
{
@@ -99,7 +100,10 @@ namespace LLama.Examples.Examples

// Initilize Images in executor
//
ex.ImagePaths = imagePaths.ToList();
foreach (var image in imagePaths)
{
ex.Images.Add(File.ReadAllBytes(image));
}
}

Console.ForegroundColor = Color.White;


+ 5
- 6
LLama/Abstractions/ILLamaExecutor.cs View File

@@ -22,14 +22,13 @@ namespace LLama.Abstractions
/// <summary>
/// Muti-Modal Projections / Clip Model weights
/// </summary>
public LLavaWeights? ClipModel { get; }
public LLavaWeights? ClipModel { get; }
/// <summary>
/// List of images: Image filename and path (jpeg images).
/// List of images: Image filen path, uri or image byte array. See ImageData.
/// </summary>
public List<string> ImagePaths { get; set; }
public List<byte[]> Images { get; }

/// <summary>
/// Asynchronously infers a response from the model.
/// </summary>


+ 11
- 5
LLama/LLamaExecutorBase.cs View File

@@ -76,11 +76,11 @@ namespace LLama
}
/// <inheritdoc />
public LLavaWeights? ClipModel { get; }
public LLavaWeights? ClipModel { get; }
/// <inheritdoc />
public List<string> ImagePaths { get; set; }
public List<byte[]> Images { get; set; }
/// <summary>
/// Current "mu" value for mirostat sampling
/// </summary>
@@ -95,7 +95,7 @@ namespace LLama
/// <param name="logger"></param>
protected StatefulExecutorBase(LLamaContext context, ILogger? logger = null)
{
ImagePaths = new List<string>();
Images = new List<byte[]>();
_logger = logger;
Context = context;
_pastTokensCount = 0;
@@ -105,6 +105,12 @@ namespace LLama
_decoder = new StreamingTokenDecoder(context);
}
/// <summary>
///
/// </summary>
/// <param name="context"></param>
/// <param name="lLavaWeights"></param>
/// <param name="logger"></param>
public StatefulExecutorBase(LLamaContext context, LLavaWeights lLavaWeights, ILogger? logger = null) :
this( context, logger )
{


+ 5
- 4
LLama/LLamaInteractExecutor.cs View File

@@ -11,6 +11,7 @@ using System.Threading.Tasks;
using LLama.Exceptions;
using LLama.Extensions;
using Microsoft.Extensions.Logging;
using System.Net.Http;

namespace LLama
{
@@ -148,13 +149,13 @@ namespace LLama
int usedTokens = 0;
// If the prompt contains the tag <image> extract this.
_imageInPrompt = text.Contains("<image>");
if (_imageInPrompt)
if (_imageInPrompt && ClipModel != null)
{
foreach (var image in ImagePaths)
foreach (var image in Images)
{
_imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromFileName( ClipModel.NativeHandle, Context, image ) );
_imageEmbedHandles.Add(SafeLlavaImageEmbedHandle.CreateFromMemory(ClipModel.NativeHandle, Context, image));
}
int imageIndex = text.IndexOf("<image>");
// Tokenize segment 1 (before <image> tag)
string preImagePrompt = text.Substring(0, imageIndex);


+ 10
- 4
LLama/LLamaStatelessExecutor.cs View File

@@ -26,10 +26,16 @@ namespace LLama
// LLava Section
public bool IsMultiModal => false;

/// <inheritdoc />
public bool MultiModalProject { get; }
public LLavaWeights? ClipModel { get; }
public List<string> ImagePaths { get; set; }

/// <inheritdoc />
public LLavaWeights? ClipModel { get; }

/// <inheritdoc />
public List<byte[]> Images { get; set; }

/// <summary>
/// The context used by the executor when running the inference.
/// </summary>
@@ -43,7 +49,7 @@ namespace LLama
/// <param name="logger"></param>
public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null)
{
ImagePaths = new List<string>();
Images = new List<byte[]>();
_weights = weights;
_params = @params;
_logger = logger;


Loading…
Cancel
Save