Browse Source

Merge branch 'SciSharp:master' into feature/interactive-sk-chatcompletion

pull/671/head
Chirag Karia GitHub 2 years ago
parent
commit
05937de5dc
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
18 changed files with 106 additions and 105 deletions
  1. +6
    -4
      .github/workflows/main.yml
  2. +1
    -1
      LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
  3. +1
    -7
      LLama.KernelMemory/LlamaSharpTextGenerator.cs
  4. +2
    -1
      LLama.Unittest/BasicTest.cs
  5. +2
    -2
      LLama.Unittest/BeamTests.cs
  6. +23
    -1
      LLama.Unittest/Constants.cs
  7. +1
    -0
      LLama.Unittest/GrammarTest.cs
  8. +1
    -0
      LLama.Unittest/LLamaContextTests.cs
  9. +27
    -24
      LLama.Unittest/LLamaEmbedderTests.cs
  10. +4
    -3
      LLama.Unittest/LLavaWeightsTests.cs
  11. +4
    -2
      LLama.Unittest/MemoryDisposalTests.cs
  12. +1
    -0
      LLama.Unittest/StatelessExecutorTest.cs
  13. +2
    -1
      LLama.Unittest/TokenTests.cs
  14. +12
    -4
      LLama/LLamaEmbedder.cs
  15. +4
    -4
      LLama/LLamaInstructExecutor.cs
  16. +6
    -6
      LLama/LLamaInteractExecutor.cs
  17. +1
    -1
      LLama/LLamaStatelessExecutor.cs
  18. +8
    -44
      LLama/Native/NativeApi.cs

+ 6
- 4
.github/workflows/main.yml View File

@@ -15,11 +15,14 @@ jobs:
strategy:
fail-fast: false
matrix:
build: [linux-release, windows-release]
build: [linux-release, windows-release, osx-release]
include:
- build: linux-release
os: ubuntu-latest
config: release
- build: osx-release
os: macos-14 # https://github.blog/changelog/2024-01-30-github-actions-introducing-the-new-m1-macos-runner-available-to-open-source/
config: release
- build: windows-release
os: windows-2019
config: release
@@ -27,8 +30,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-dotnet@v4
with:
dotnet-version: |
7.0.x
dotnet-version: |
8.0.x
- name: Cache Packages
uses: actions/cache@v4
@@ -43,7 +45,7 @@ jobs:
- name: Build
run: dotnet build LLamaSharp.sln -c ${{ matrix.config }} --no-restore
- name: Test
run: dotnet test LLamaSharp.sln -c ${{ matrix.config }} -l "console;verbosity=detailed" --diag:logs/log.txt
run: dotnet test LLamaSharp.sln -c ${{ matrix.config }} -l "console;verbosity=detailed" --diag:logs/log.txt --filter Category!=NoCI
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v3


+ 1
- 1
LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs View File

@@ -104,6 +104,6 @@ namespace LLamaSharp.KernelMemory
}

/// <inheritdoc/>
public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length;
public int CountTokens(string text) => _embedder.Context.Tokenize(text, special: true).Length;
}
}

+ 1
- 7
LLama.KernelMemory/LlamaSharpTextGenerator.cs View File

@@ -1,13 +1,7 @@
using LLama;
using LLama.Abstractions;
using LLama.Common;
using LLama.Native;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace LLamaSharp.KernelMemory
{
@@ -111,6 +105,6 @@ namespace LLamaSharp.KernelMemory
}

/// <inheritdoc/>
public int CountTokens(string text) => _context.Tokenize(text).Length;
public int CountTokens(string text) => _context.Tokenize(text, special: true).Length;
}
}

+ 2
- 1
LLama.Unittest/BasicTest.cs View File

@@ -17,7 +17,8 @@ namespace LLama.Unittest
_testOutputHelper = testOutputHelper;
_params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048
ContextSize = 2048,
GpuLayerCount = Constants.CIGpuLayerCount
};
_model = LLamaWeights.LoadFromFile(_params);
}


+ 2
- 2
LLama.Unittest/BeamTests.cs View File

@@ -17,7 +17,8 @@ public sealed class BeamTests
_testOutputHelper = testOutputHelper;
_params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048
ContextSize = 2048,
GpuLayerCount = Constants.CIGpuLayerCount,
};
_model = LLamaWeights.LoadFromFile(_params);
}
@@ -27,7 +28,6 @@ public sealed class BeamTests
_model.Dispose();
}

//[Fact(Skip = "Very very slow in CI")]
[Fact]
public void BasicBeam()
{


+ 23
- 1
LLama.Unittest/Constants.cs View File

@@ -1,4 +1,6 @@
namespace LLama.Unittest
using System.Runtime.InteropServices;

namespace LLama.Unittest
{
internal static class Constants
{
@@ -8,5 +10,25 @@
public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
public static readonly string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";

/// <summary>
/// Calculate GpuLayer Count to use in UnitTest
/// </summary>
/// <returns> Defaults to 20 in all the cases, except MacOS/OSX release (to disable METAL on github CI)</returns>
public static int CIGpuLayerCount
{
get
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
#if DEBUG
return 20;
#else
return 0;
#endif
}
else return 20;
}
}
}
}

+ 1
- 0
LLama.Unittest/GrammarTest.cs View File

@@ -16,6 +16,7 @@ namespace LLama.Unittest
{
ContextSize = 2048,
Seed = 92,
GpuLayerCount = Constants.CIGpuLayerCount,
};
_model = LLamaWeights.LoadFromFile(_params);
}


+ 1
- 0
LLama.Unittest/LLamaContextTests.cs View File

@@ -14,6 +14,7 @@ namespace LLama.Unittest
var @params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 768,
GpuLayerCount = Constants.CIGpuLayerCount,
};
_weights = LLamaWeights.LoadFromFile(@params);
_context = _weights.CreateContext(@params);


+ 27
- 24
LLama.Unittest/LLamaEmbedderTests.cs View File

@@ -1,32 +1,15 @@
using LLama.Common;
using LLama.Native;
using LLama.Common;
using Xunit.Abstractions;
using Xunit.Sdk;

namespace LLama.Unittest;

public sealed class LLamaEmbedderTests
: IDisposable
{
private readonly ITestOutputHelper _testOutputHelper;
private readonly LLamaEmbedder _embedder;

public LLamaEmbedderTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
var @params = new ModelParams(Constants.EmbeddingModelPath)
{
ContextSize = 4096,
Threads = 5,
Embeddings = true,
};
using var weights = LLamaWeights.LoadFromFile(@params);
_embedder = new(weights, @params);
}

public void Dispose()
{
_embedder.Dispose();
}

private static float Dot(float[] a, float[] b)
@@ -35,17 +18,25 @@ public sealed class LLamaEmbedderTests
return a.Zip(b, (x, y) => x * y).Sum();
}


[Fact]
public async Task EmbedCompare()
private async Task CompareEmbeddings(string modelPath)
{
var cat = await _embedder.GetEmbeddings("The cat is cute");
var @params = new ModelParams(modelPath)
{
ContextSize = 8,
Threads = 4,
Embeddings = true,
GpuLayerCount = Constants.CIGpuLayerCount,
};
using var weights = LLamaWeights.LoadFromFile(@params);
using var embedder = new LLamaEmbedder(weights, @params);

var cat = await embedder.GetEmbeddings("The cat is cute");
Assert.DoesNotContain(float.NaN, cat);

var kitten = await _embedder.GetEmbeddings("The kitten is kawaii");
var kitten = await embedder.GetEmbeddings("The kitten is kawaii");
Assert.DoesNotContain(float.NaN, kitten);

var spoon = await _embedder.GetEmbeddings("The spoon is not real");
var spoon = await embedder.GetEmbeddings("The spoon is not real");
Assert.DoesNotContain(float.NaN, spoon);

_testOutputHelper.WriteLine($"Cat = [{string.Join(",", cat.AsMemory().Slice(0, 7).ToArray())}...]");
@@ -61,4 +52,16 @@ public sealed class LLamaEmbedderTests

Assert.True(close < far);
}

[Fact]
public async Task EmbedCompareEmbeddingModel()
{
await CompareEmbeddings(Constants.EmbeddingModelPath);
}

[Fact]
public async Task EmbedCompareGenerateModel()
{
await CompareEmbeddings(Constants.GenerativeModelPath);
}
}

+ 4
- 3
LLama.Unittest/LLavaWeightsTests.cs View File

@@ -17,7 +17,8 @@ namespace LLama.Unittest
var @params = new ModelParams(Constants.GenerativeModelPath)
{
// Llava models requires big context
ContextSize = 4096
ContextSize = 4096,
GpuLayerCount = Constants.CIGpuLayerCount,
};
_llamaWeights = LLamaWeights.LoadFromFile(@params);
_lLavaWeights = LLavaWeights.LoadFromFile(Constants.LLavaMmpPath);
@@ -32,7 +33,7 @@ namespace LLama.Unittest
_lLavaWeights.Dispose();
}
[Fact(Skip = "Very very slow in CI")]
[Fact,Trait("Category", "NoCI")]
public void EmbedImageAsFileName()
{
int n_past = 0;
@@ -40,7 +41,7 @@ namespace LLama.Unittest
Assert.True( _lLavaWeights.EvalImageEmbed( _context, emb, ref n_past ) );
}
[Fact(Skip = "Very very slow in CI")]
[Fact,Trait("Category", "NoCI")]
public void EmbedImageAsBinary()
{
int n_past = 0;


+ 4
- 2
LLama.Unittest/MemoryDisposalTests.cs View File

@@ -9,7 +9,8 @@ public class MemoryDisposalTests
{
var @params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048
ContextSize = 2048,
GpuLayerCount = 0,
};
var model = LLamaWeights.LoadFromFile(@params);

@@ -23,7 +24,8 @@ public class MemoryDisposalTests
{
var @params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048
ContextSize = 2048,
GpuLayerCount = Constants.CIGpuLayerCount,
};
var model = LLamaWeights.LoadFromFile(@params);



+ 1
- 0
LLama.Unittest/StatelessExecutorTest.cs View File

@@ -20,6 +20,7 @@ namespace LLama.Unittest
ContextSize = 60,
Seed = 1754,
BatchSize = 2,
GpuLayerCount = Constants.CIGpuLayerCount,
};
_weights = LLamaWeights.LoadFromFile(_params);
}


+ 2
- 1
LLama.Unittest/TokenTests.cs View File

@@ -14,7 +14,8 @@ public sealed class TokenTests
{
_params = new ModelParams(Constants.GenerativeModelPath)
{
ContextSize = 2048
ContextSize = 2048,
GpuLayerCount = Constants.CIGpuLayerCount,
};
_model = LLamaWeights.LoadFromFile(_params);
}


+ 12
- 4
LLama/LLamaEmbedder.cs View File

@@ -97,11 +97,18 @@ namespace LLama

private float[] GetEmbeddingsArray()
{
var embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero);
if (embeddings.Length == 0)
return Array.Empty<float>();
unsafe
{
var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle);

if (embeddings == null)
embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero);

return embeddings.ToArray();
if (embeddings == null)
return Array.Empty<float>();

return new Span<float>(embeddings, Context.EmbeddingSize).ToArray();
}
}

private static void Normalize(Span<float> embeddings)
@@ -112,6 +119,7 @@ namespace LLama
lengthSqr += value * value;
var length = (float)Math.Sqrt(lengthSqr);

// Do not divide by length if it is zero
if (length <= float.Epsilon)
return;



+ 4
- 4
LLama/LLamaInstructExecutor.cs View File

@@ -38,8 +38,8 @@ namespace LLama
ILogger? logger = null)
: base(context, logger)
{
_inp_pfx = Context.Tokenize(instructionPrefix, true);
_inp_sfx = Context.Tokenize(instructionSuffix, false);
_inp_pfx = Context.Tokenize(instructionPrefix, true, true);
_inp_sfx = Context.Tokenize(instructionSuffix, false, true);
_instructionPrefix = instructionPrefix;
}

@@ -124,7 +124,7 @@ namespace LLama
if (_is_prompt_run)
{
// When running the first input (prompt) in inteactive mode, we should specially process it.
_embed_inps = Context.Tokenize(text, true).ToList();
_embed_inps = Context.Tokenize(text, true, true).ToList();
}
else
{
@@ -135,7 +135,7 @@ namespace LLama
_consumedTokensCount = _embed_inps.Count;
_embed_inps.AddRange(_inp_pfx);

var line_inp = Context.Tokenize(text, false);
var line_inp = Context.Tokenize(text, false, true);
_embed_inps.AddRange(line_inp);

_embed_inps.AddRange(_inp_sfx);


+ 6
- 6
LLama/LLamaInteractExecutor.cs View File

@@ -119,7 +119,7 @@ namespace LLama
// When running the first input (prompt) in interactive mode, we should specially process it.
if (!this.IsMultiModal)
{
_embed_inps = Context.Tokenize(text, true).ToList();
_embed_inps = Context.Tokenize(text, true, true).ToList();
}
else
{
@@ -135,7 +135,7 @@ namespace LLama

if (!this.IsMultiModal)
{
var line_inp = Context.Tokenize(text, false);
var line_inp = Context.Tokenize(text, false, true);
_embed_inps.AddRange(line_inp);
args.RemainedTokens -= line_inp.Length;
}
@@ -165,11 +165,11 @@ namespace LLama
int imageIndex = text.IndexOf("<image>");
// Tokenize segment 1 (before <image> tag)
string preImagePrompt = text.Substring(0, imageIndex);
var segment1 = Context.Tokenize(preImagePrompt, addBos );
var segment1 = Context.Tokenize(preImagePrompt, addBos, true);
// Remember the position to add the image embeddings
_EmbedImagePosition = segment1.Length;
string postImagePrompt = text.Substring(imageIndex + 7);
var segment2 = Context.Tokenize(postImagePrompt, false);
var segment2 = Context.Tokenize(postImagePrompt, false, true);
_embed_inps.AddRange(segment1);
_embed_inps.AddRange(segment2);
usedTokens += (segment1.Length + segment2.Length);
@@ -178,11 +178,11 @@ namespace LLama
{
if (addBos)
{
_embed_inps = Context.Tokenize(text, true).ToList();
_embed_inps = Context.Tokenize(text, true, true).ToList();
}
else
{
var line_inp = Context.Tokenize(text, false);
var line_inp = Context.Tokenize(text, false, true);
_embed_inps.AddRange(line_inp);
args.RemainedTokens -= line_inp.Length;
}


+ 1
- 1
LLama/LLamaStatelessExecutor.cs View File

@@ -90,7 +90,7 @@ namespace LLama
lastTokens.Add(0);

// Tokenize the prompt
var tokens = Context.Tokenize(prompt).ToList();
var tokens = Context.Tokenize(prompt, special: true).ToList();
lastTokens.AddRange(tokens);

// Evaluate the prompt, in chunks smaller than the max batch size


+ 8
- 44
LLama/Native/NativeApi.cs View File

@@ -137,41 +137,17 @@ namespace LLama.Native
/// Get the embeddings for the a specific sequence.
/// Equivalent to: llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
/// </summary>
/// <returns></returns>
public static Span<float> llama_get_embeddings_seq(SafeLLamaContextHandle ctx, LLamaSeqId id)
{
unsafe
{
var ptr = llama_get_embeddings_seq_native(ctx, id);
if (ptr == null)
return Array.Empty<float>();

return new Span<float>(ptr, ctx.EmbeddingSize);
}

[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_get_embeddings_seq")]
static extern unsafe float* llama_get_embeddings_seq_native(SafeLLamaContextHandle ctx, LLamaSeqId id);
}
/// <returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern unsafe float* llama_get_embeddings_seq(SafeLLamaContextHandle ctx, LLamaSeqId id);

/// <summary>
/// Get the embeddings for the ith sequence.
/// Equivalent to: llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
/// </summary>
/// <returns></returns>
public static Span<float> llama_get_embeddings_ith(SafeLLamaContextHandle ctx, int i)
{
unsafe
{
var ptr = llama_get_embeddings_ith_native(ctx, i);
if (ptr == null)
return Array.Empty<float>();

return new Span<float>(ptr, ctx.EmbeddingSize);
}

[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_get_embeddings_ith")]
static extern unsafe float* llama_get_embeddings_ith_native(SafeLLamaContextHandle ctx, int i);
}
/// <returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern unsafe float* llama_get_embeddings_ith(SafeLLamaContextHandle ctx, int i);

/// <summary>
/// Get all output token embeddings.
@@ -182,20 +158,8 @@ namespace LLama.Native
/// </summary>
/// <param name="ctx"></param>
/// <returns></returns>
public static Span<float> llama_get_embeddings(SafeLLamaContextHandle ctx)
{
unsafe
{
var ptr = llama_get_embeddings_native(ctx);
if (ptr == null)
return Array.Empty<float>();

return new Span<float>(ptr, ctx.EmbeddingSize);
}

[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_get_embeddings")]
static extern unsafe float* llama_get_embeddings_native(SafeLLamaContextHandle ctx);
}
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern unsafe float* llama_get_embeddings(SafeLLamaContextHandle ctx);

/// <summary>
/// Apply chat template. Inspired by hf apply_chat_template() on python.


Loading…
Cancel
Save