You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaSharpTextEmbeddingGenerator.cs 4.2 kB

April 2024 Binary Update (#662) * Updated binaries, using [this build](https://github.com/SciSharp/LLamaSharp/actions/runs/8654672719/job/23733195669) for llama.cpp commit `f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7`. - Added all new functions. - Moved some functions (e.g. `SafeLlamaModelHandle` specific functions) into `SafeLlamaModelHandle.cs` - Exposed tokens on `SafeLlamaModelHandle` and `LLamaWeights` through a `Tokens` property. As new special tokens are added in the future they can be added here. - Changed all token properties to return nullable tokens, to handle some models not having some tokens. - Fixed `DefaultSamplingPipeline` to handle no newline token in some models. * Moved native methods to more specific locations. - Context specific things have been moved into `SafeLLamaContextHandle.cs` and made private - they're exposed through C# properties and methods already. - Checking that GPU layer count is zero if GPU offload is not supported. - Moved methods for creating default structs (`llama_model_quantize_default_params` and `llama_context_default_params`) into relevant structs. * Removed exception if `GpuLayerCount > 0` when GPU is not supported. * - Added low level wrapper methods for new per-sequence state load/save in `SafeLLamaContextHandle` - Added high level wrapper methods (save/load with `State` object or memory mapped file) in `LLamaContext` - Moved native methods for per-sequence state load/save into `SafeLLamaContextHandle` * Added update and defrag methods for KV cache in `SafeLLamaContextHandle` * Updated submodule to `f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7` * Passing the sequence ID when saving a single sequence state
2 years ago
April 2024 Binary Update (#662) * Updated binaries, using [this build](https://github.com/SciSharp/LLamaSharp/actions/runs/8654672719/job/23733195669) for llama.cpp commit `f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7`. - Added all new functions. - Moved some functions (e.g. `SafeLlamaModelHandle` specific functions) into `SafeLlamaModelHandle.cs` - Exposed tokens on `SafeLlamaModelHandle` and `LLamaWeights` through a `Tokens` property. As new special tokens are added in the future they can be added here. - Changed all token properties to return nullable tokens, to handle some models not having some tokens. - Fixed `DefaultSamplingPipeline` to handle no newline token in some models. * Moved native methods to more specific locations. - Context specific things have been moved into `SafeLLamaContextHandle.cs` and made private - they're exposed through C# properties and methods already. - Checking that GPU layer count is zero if GPU offload is not supported. - Moved methods for creating default structs (`llama_model_quantize_default_params` and `llama_context_default_params`) into relevant structs. * Removed exception if `GpuLayerCount > 0` when GPU is not supported. * - Added low level wrapper methods for new per-sequence state load/save in `SafeLLamaContextHandle` - Added high level wrapper methods (save/load with `State` object or memory mapped file) in `LLamaContext` - Moved native methods for per-sequence state load/save into `SafeLLamaContextHandle` * Added update and defrag methods for KV cache in `SafeLLamaContextHandle` * Updated submodule to `f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7` * Passing the sequence ID when saving a single sequence state
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. using LLama;
  2. using LLama.Common;
  3. using LLama.Native;
  4. using Microsoft.KernelMemory;
  5. using Microsoft.KernelMemory.AI;
  6. namespace LLamaSharp.KernelMemory
  7. {
  8. /// <summary>
  9. /// Provides text embedding generation for LLamaSharp.
  10. /// </summary>
  11. public class LLamaSharpTextEmbeddingGenerator
  12. : ITextEmbeddingGenerator, IDisposable
  13. {
  14. private readonly LLamaSharpConfig? _config;
  15. private readonly LLamaWeights? _weights;
  16. private readonly LLamaEmbedder _embedder;
  17. private bool _ownsEmbedder = false;
  18. private bool _ownsWeights = false;
  19. /// <inheritdoc/>
  20. public int MaxTokens => (int?)_config?.ContextSize ?? 2048;
  21. /// <summary>
  22. /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class.
  23. /// </summary>
  24. /// <param name="config">The configuration for LLamaSharp.</param>
  25. public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
  26. {
  27. this._config = config;
  28. var @params = new ModelParams(_config.ModelPath)
  29. {
  30. ContextSize = config.ContextSize ?? 2048,
  31. Seed = config.Seed ?? 0,
  32. GpuLayerCount = config.GpuLayerCount ?? 20,
  33. Embeddings = true,
  34. MainGpu = _config.MainGpu,
  35. SplitMode = _config.SplitMode
  36. };
  37. _weights = LLamaWeights.LoadFromFile(@params);
  38. _embedder = new LLamaEmbedder(_weights, @params);
  39. _ownsWeights = true;
  40. _ownsEmbedder = true;
  41. }
  42. /// <summary>
  43. /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class from reused weights.
  44. /// </summary>
  45. /// <param name="config">The configuration for LLamaSharp.</param>
  46. /// <param name="weights">A LLamaWeights object.</param>
  47. public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights weights)
  48. {
  49. this._config = config;
  50. var @params = new ModelParams(_config.ModelPath)
  51. {
  52. ContextSize = config.ContextSize ?? 2048,
  53. Seed = config.Seed ?? 0,
  54. GpuLayerCount = config.GpuLayerCount ?? 20,
  55. Embeddings = true,
  56. MainGpu = _config.MainGpu,
  57. SplitMode = _config.SplitMode
  58. };
  59. _weights = weights;
  60. _embedder = new LLamaEmbedder(_weights, @params);
  61. _ownsEmbedder = true;
  62. }
  63. /// <summary>
  64. /// Initializes a new instance of the <see cref="LLamaSharpTextEmbeddingGenerator"/> class from reused embedder.
  65. /// </summary>
  66. /// <param name="embedder">A LLamaEmbedder object.</param>
  67. public LLamaSharpTextEmbeddingGenerator(LLamaEmbedder embedder)
  68. {
  69. this._config = null;
  70. this._weights = null;
  71. _embedder = embedder;
  72. }
  73. /// <inheritdoc/>
  74. public void Dispose()
  75. {
  76. if (_ownsWeights)
  77. {
  78. _weights?.Dispose();
  79. }
  80. if (_ownsEmbedder)
  81. {
  82. _embedder.Dispose();
  83. }
  84. }
  85. /// <inheritdoc/>
  86. public async Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, CancellationToken cancellationToken = default)
  87. {
  88. IList<ReadOnlyMemory<float>> results = new List<ReadOnlyMemory<float>>();
  89. foreach (var d in data)
  90. {
  91. var embeddings = await _embedder.GetEmbeddings(d, cancellationToken);
  92. results.Add(new ReadOnlyMemory<float>(embeddings));
  93. }
  94. return results;
  95. }
  96. /// <inheritdoc/>
  97. public async Task<Embedding> GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default)
  98. {
  99. var embeddings = await _embedder.GetEmbeddings(text, cancellationToken);
  100. return new Embedding(embeddings);
  101. }
  102. /// <inheritdoc/>
  103. public int CountTokens(string text) => _embedder.Context.Tokenize(text, special: true).Length;
  104. }
  105. }