diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj index f46a381d..9bc41b0b 100644 --- a/LLama.Examples/LLama.Examples.csproj +++ b/LLama.Examples/LLama.Examples.csproj @@ -29,6 +29,7 @@ + diff --git a/LLama.Examples/Program.cs b/LLama.Examples/Program.cs index b24ef406..53edfb3b 100644 --- a/LLama.Examples/Program.cs +++ b/LLama.Examples/Program.cs @@ -1,5 +1,6 @@ using LLama.Native; using Spectre.Console; +using System.Runtime.InteropServices; AnsiConsole.MarkupLineInterpolated( $""" @@ -16,23 +17,24 @@ AnsiConsole.MarkupLineInterpolated( """); -// Configure native library to use. This must be done before any other llama.cpp methods are called! -NativeLibraryConfig - .Instance - .WithCuda(); - // Configure logging. Change this to `true` to see log messages from llama.cpp -var showLLamaCppLogs = false; +var showLLamaCppLogs = true; NativeLibraryConfig - .Instance + .All .WithLogCallback((level, message) => - { - if (showLLamaCppLogs) - Console.WriteLine($"[llama {level}]: {message.TrimEnd('\n')}"); - }); + { + if (showLLamaCppLogs) + Console.WriteLine($"[llama {level}]: {message.TrimEnd('\n')}"); + }); + +// Configure native library to use. This must be done before any other llama.cpp methods are called! +NativeLibraryConfig + .All + .WithCuda() + //.WithAutoDownload() // An experimental feature + .DryRun(); // Calling this method forces loading to occur now. NativeApi.llama_empty_call(); -await ExampleRunner.Run(); - +await ExampleRunner.Run(); \ No newline at end of file diff --git a/LLama.Experimental/Extensions/NativeLibraryAutoDownloadExtension.cs b/LLama.Experimental/Extensions/NativeLibraryAutoDownloadExtension.cs new file mode 100644 index 00000000..89b68122 --- /dev/null +++ b/LLama.Experimental/Extensions/NativeLibraryAutoDownloadExtension.cs @@ -0,0 +1,88 @@ +using LLama.Native; +using LLama.Experimental.Native; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + public static class NativeLibraryAutoDownloadExtension + { + /// + /// Set whether to download the best-matched native library file automatically if there's no backend or specified file to load. + /// You could add a setting here to customize the behavior of the download. + /// + /// * If auto-download is enabled, please call after you have finished setting your configurations. + /// + /// + /// + /// + /// + /// + public static NativeLibraryConfig WithAutoDownload(this NativeLibraryConfig config, bool enable = true, NativeLibraryDownloadSettings? settings = null) + { + if (config.LibraryHasLoaded) + { + throw new Exception("The library has already loaded, you can't change the configurations. " + + "Please finish the configuration setting before any call to LLamaSharp native APIs." + + "Please use NativeLibraryConfig.DryRun if you want to see whether it's loaded successfully " + + "but still have chance to modify the configurations."); + } + if (enable) + { + if(settings is null) + { + settings = NativeLibraryDownloadSettings.Create(); + } + // Don't modify and pass the original object to `Description`, create a new one instead. + // Also, we need to set the default local directory if the user does not. + if (string.IsNullOrEmpty(settings.Tag)) + { + settings = settings.WithTag(GetCommitHash(NativeLibraryConfig.CurrentVersion)); + } + var defaultLocalDir = NativeLibraryDownloadSettings.GetDefaultLocalDir(settings.Tag); + settings = settings.WithLocalDir(settings.LocalDir ?? defaultLocalDir); + + // When using auto-download, this should be the only search directory. + List searchDirectoriesForDownload = [settings.LocalDir!]; + // unless extra search paths are added by the user. + searchDirectoriesForDownload.AddRange(settings.ExtraSearchDirectories ?? []); + config.WithSearchDirectories(searchDirectoriesForDownload); + + config.WithSelectingPolicy(new SelectingPolicyWithAutoDownload(settings)); + } + return config; + } + + private static string GetCommitHash(string version) + { + if (NativeLibraryConfig.VersionMap.TryGetValue(version, out var hash)) + { + return hash; + } + else + { + return version; + } + } + + /// + /// Set whether to download the best-matched native library file automatically if there's no backend or specified file to load. + /// You could add a setting here to customize the behavior of the download. + /// + /// If auto-download is enabled, please call after you have finished setting your configurations. + /// + /// + /// + /// + /// + public static NativeLibraryConfigContainer WithAutoDownload(this NativeLibraryConfigContainer container, + bool enable = true, NativeLibraryDownloadSettings? settings = null) + { + foreach(var config in container.Configs) + { + config.WithAutoDownload(enable, settings); + } + return container; + } + } +#endif +} diff --git a/LLama.Experimental/LLama.Experimental.csproj b/LLama.Experimental/LLama.Experimental.csproj new file mode 100644 index 00000000..12b5bf01 --- /dev/null +++ b/LLama.Experimental/LLama.Experimental.csproj @@ -0,0 +1,19 @@ + + + + net6;net7;net8;netstandard2.0 + enable + enable + 12 + LLama + + + + + + + + + + + diff --git a/LLama.Experimental/Native/AutoDownloadedLibraries.cs b/LLama.Experimental/Native/AutoDownloadedLibraries.cs new file mode 100644 index 00000000..9a98bec7 --- /dev/null +++ b/LLama.Experimental/Native/AutoDownloadedLibraries.cs @@ -0,0 +1,91 @@ +using LLama.Abstractions; +using LLama.Native; + +namespace LLama.Experimental.Native +{ +#if NET6_0_OR_GREATER + public class AutoDownloadedLibraries + { + public class Cuda: INativeLibrary + { + private NativeLibraryWithCuda _cudaLibrary; + private NativeLibraryDownloadSettings _settings; + + public Cuda(NativeLibraryWithCuda cudaLibrary, NativeLibraryDownloadSettings settings) + { + _cudaLibrary = cudaLibrary; + _settings = settings; + } + + public NativeLibraryMetadata? Metadata => _cudaLibrary.Metadata; + + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null) + { + foreach(var relativePath in _cudaLibrary.Prepare(systemInfo, logCallback)) + { + yield return relativePath; + var path = NativeLibraryDownloader.DownloadLibraryFile(_settings, relativePath, logCallback).Result; + if (path is not null) + { + yield return path; + } + } + } + } + + public class Avx : INativeLibrary + { + private NativeLibraryWithAvx _avxLibrary; + private NativeLibraryDownloadSettings _settings; + + public Avx(NativeLibraryWithAvx avxLibrary, NativeLibraryDownloadSettings settings) + { + _avxLibrary = avxLibrary; + _settings = settings; + } + + public NativeLibraryMetadata? Metadata => _avxLibrary.Metadata; + + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null) + { + foreach (var relativePath in _avxLibrary.Prepare(systemInfo, logCallback)) + { + yield return relativePath; + var path = NativeLibraryDownloader.DownloadLibraryFile(_settings, relativePath, logCallback).Result; + if (path is not null) + { + yield return path; + } + } + } + } + + public class MacOrFallback : INativeLibrary + { + private NativeLibraryWithMacOrFallback _macLibrary; + private NativeLibraryDownloadSettings _settings; + + public MacOrFallback(NativeLibraryWithMacOrFallback macLibrary, NativeLibraryDownloadSettings settings) + { + _macLibrary = macLibrary; + _settings = settings; + } + + public NativeLibraryMetadata? Metadata => _macLibrary.Metadata; + + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null) + { + foreach (var relativePath in _macLibrary.Prepare(systemInfo, logCallback)) + { + yield return relativePath; + var path = NativeLibraryDownloader.DownloadLibraryFile(_settings, relativePath, logCallback).Result; + if (path is not null) + { + yield return path; + } + } + } + } + } +#endif +} diff --git a/LLama.Experimental/Native/NativeLibraryDownloader.cs b/LLama.Experimental/Native/NativeLibraryDownloader.cs new file mode 100644 index 00000000..a09857d9 --- /dev/null +++ b/LLama.Experimental/Native/NativeLibraryDownloader.cs @@ -0,0 +1,268 @@ +using HuggingfaceHub; + +namespace LLama.Native +{ + internal class NativeLibraryDownloader + { + /// + /// Download the library file + /// + /// + /// + /// + /// The local path of the file if successful otherwise null. + public static async Task DownloadLibraryFile(NativeLibraryDownloadSettings settings, string remoteFilePath, NativeLogConfig.LLamaLogCallback? logCallback = null) + { + if (settings.LocalDir is null) + { + // Null local directory is not expected here (it will make things more complex if we want to handle it). + // It should always be set when gathering the description. + throw new Exception("Auto-download is enabled for native library but the `LocalDir` is null. " + + "It's an unexpected behavior and please report an issue to LLamaSharp."); + } + HFGlobalConfig.DefaultDownloadTimeout = settings.Timeout; + + HashSet endpointSet = new([settings.Endpoint]); + if (settings.EndpointFallbacks is not null) + { + foreach (var endpoint in settings.EndpointFallbacks) + { + endpointSet.Add(endpoint); + } + } + var endpoints = endpointSet.ToArray(); + + Dictionary exceptionMap = new(); + foreach(var endpoint in endpoints) + { + logCallback?.Invoke(LLamaLogLevel.Debug, $"Downloading the native library file '{remoteFilePath}' from {endpoint} with repo = {settings.RepoId}, tag = {settings.Tag}"); + var path = await HFDownloader.DownloadFileAsync(settings.RepoId, remoteFilePath, revision: settings.Tag, cacheDir: settings.CacheDir, + localDir: settings.LocalDir, token: settings.Token, endpoint: endpoint); + if (path is not null) + { + logCallback?.Invoke(LLamaLogLevel.Debug, $"Successfully downloaded the native library file to {path}"); + return path; + } + else + { + logCallback?.Invoke(LLamaLogLevel.Warning, "The download failed without an explicit error, please check your configuration or report an issue to LLamaSharp."); + } + } + + // means that the download finally fails. + return null; + } + } + + /// + /// Settings for downloading the native library. + /// + public class NativeLibraryDownloadSettings + { + /// + /// The endpoint to download from, by default the official site of HuggingFace. + /// + public string Endpoint { get; private set; } = "https://huggingface.co"; + + /// + /// Endpoints to fallback to if downloading with the main endpoint fails. + /// + /// Generally this is an option for those countries or regions where the main endpoint is blocked. + /// You should not put too many endpoints here, as it will slow down the downloading process. + /// + public string[]? EndpointFallbacks { get; private set; } = null; + + /// + /// The version of the library to download. Please use LLamaSharp version in format `[major].[minor].[patch]` as tag + /// or go to https://huggingface.co/AsakusaRinne/LLamaSharpNative + /// to see all available tags, or use your own repo and tags. + /// + public string Tag { get; private set; } = string.Empty; + + /// + /// The repo id to download the native library files. + /// + public string RepoId { get; private set; } = "AsakusaRinne/LLamaSharpNative"; + + /// + /// The directory to cache the downloaded files. If you only want to make the downloaded files appear in a directory, + /// regardless of whether the file will have a copy in another place, please set instead. + /// + public string CacheDir { get; private set; } + + /// + /// If provided, the downloaded file will be placed under this directory, + /// either as a symlink (default) or a regular file. + /// + public string? LocalDir { get; private set; } = null; + + /// + /// If you are using your own private repo as remote source, you could set the token to get the access. + /// + public string? Token { get; private set; } = null; + + /// + /// The timeout (second) of the native library file download. + /// + public int Timeout { get; private set; } = 10; + + /// + /// Extra search directories. They will only be used when finding files downloaded from remote. + /// Generally it will be useful when you wnat to replace the downloading process with your custom implementation. + /// If you are not sure how it works, please leave it empty. + /// + public string[]? ExtraSearchDirectories { get; private set; } = null; + + internal NativeLibraryDownloadSettings() + { + var home = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".cache"); + CacheDir = Path.Combine(home, "llama_sharp"); + } + + internal static string GetDefaultLocalDir(string tag) + { + var home = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile); + return Path.Combine(home, ".llama_sharp", tag); + } + + /// + /// Create a with default settings. + /// + /// + public static NativeLibraryDownloadSettings Create() + { + return new NativeLibraryDownloadSettings(); + } + + /// + /// Set the default endpoint to download file from. + /// + /// + /// + public NativeLibraryDownloadSettings WithEndpoint(string endpoint) + { + Endpoint = endpoint; + return this; + } + + /// + /// Set the endpoints to try when the download fails with the default endpoint. + /// + /// + /// + public NativeLibraryDownloadSettings WithEndpointFallbacks(params string[] endpoints) + { + EndpointFallbacks = endpoints; + return this; + } + + /// + /// Set the + /// + /// + /// + public NativeLibraryDownloadSettings WithTag(string tag) + { + Tag = tag; + return this; + } + + /// + /// Set the + /// + /// + /// + public NativeLibraryDownloadSettings WithRepoId(string repoId) + { + RepoId = repoId; + return this; + } + + /// + /// Set the . If you only want to make the downloaded files appear in a directory, + /// regardless of whether the file may have a copy in another place, please use instead. + /// + /// + /// + public NativeLibraryDownloadSettings WithCacheDir(string cacheDir) + { + CacheDir = cacheDir; + return this; + } + + /// + /// Set the + /// + /// + /// + public NativeLibraryDownloadSettings WithLocalDir(string localDir) + { + LocalDir = localDir; + return this; + } + + /// + /// Set the + /// + /// + /// + public NativeLibraryDownloadSettings WithToken(string token) + { + Token = token; + return this; + } + + /// + /// Set the + /// + /// + /// + public NativeLibraryDownloadSettings WithTimeout(int timeout) + { + Timeout = timeout; + return this; + } + + /// + /// Set . They will only be used when finding files downloaded from remote. + /// Generally it will be useful when you wnat to replace the downloading process with your custom implementation. + /// If you are not sure how it works, please ignore this method. + /// + /// + /// + public NativeLibraryDownloadSettings WithExtraSearchDirectories(string[] directories) + { + ExtraSearchDirectories = directories; + return this; + } + + /// + public override string ToString() + { + // Token should be hidden when printing it. + string hiddenToken = ""; + if (Token is not null) + { + if (Token.Length <= 10) + { + hiddenToken = new string('*', Token.Length - 1) + Token.Last(); + } + else + { + hiddenToken += Token.Substring(0, 2); + hiddenToken += new string('*', Token.Length - 3); + hiddenToken += Token.Last(); + } + } + + return $"(Endpoint = {Endpoint}, " + + $"EndpointFallbacks = {string.Join(", ", EndpointFallbacks ?? new string[0])}, " + + $"Tag = {Tag}, " + + $"RepoId = {RepoId}, " + + $"CacheDir = {CacheDir}, " + + $"LocalDir = {LocalDir}, " + + $"Token = {hiddenToken}, " + + $"Timeout = {Timeout}s)"; + } + } +} diff --git a/LLama.Experimental/Native/SelectingPolicyWithAutoDownload.cs b/LLama.Experimental/Native/SelectingPolicyWithAutoDownload.cs new file mode 100644 index 00000000..831b32d8 --- /dev/null +++ b/LLama.Experimental/Native/SelectingPolicyWithAutoDownload.cs @@ -0,0 +1,62 @@ +using LLama.Abstractions; +using LLama.Native; +using System; +using System.Collections.Generic; +using System.Text; + +namespace LLama.Experimental.Native +{ +#if NET6_0_OR_GREATER + public class SelectingPolicyWithAutoDownload: INativeLibrarySelectingPolicy + { + private DefaultNativeLibrarySelectingPolicy _defaultPolicy = new(); + private NativeLibraryDownloadSettings _downloadSettings; + + internal SelectingPolicyWithAutoDownload(NativeLibraryDownloadSettings downloadSettings) + { + _downloadSettings = downloadSettings; + } + + public IEnumerable Apply(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) + { + Log($""" + Auto-download of native library has been enabled, with the following settings: + {_downloadSettings} + """, LLamaLogLevel.Info, logCallback); + foreach(var library in _defaultPolicy.Apply(description, systemInfo, logCallback)) + { + if(library is NativeLibraryWithCuda cudaLibrary) + { + yield return new AutoDownloadedLibraries.Cuda(cudaLibrary, _downloadSettings); + } + else if(library is NativeLibraryWithAvx avxLibrary) + { + yield return new AutoDownloadedLibraries.Avx(avxLibrary, _downloadSettings); + } + else if(library is NativeLibraryWithMacOrFallback macLibrary) + { + yield return new AutoDownloadedLibraries.MacOrFallback(macLibrary, _downloadSettings); + } + else if(library is NativeLibraryFromPath) + { + yield return library; // No need to download + } + else + { + Log($"Unknown native library type of auto-download: {library.GetType()}. " + + $"Please ignore this warning if you are using self-defined native library", LLamaLogLevel.Warning, logCallback); + yield return library; + } + } + } + + private void Log(string message, LLamaLogLevel level, NativeLogConfig.LLamaLogCallback? logCallback) + { + if (!message.EndsWith("\n")) + message += "\n"; + + logCallback?.Invoke(level, message); + } + } +#endif +} diff --git a/LLama/Abstractions/INativeLibrary.cs b/LLama/Abstractions/INativeLibrary.cs new file mode 100644 index 00000000..a7e00b75 --- /dev/null +++ b/LLama/Abstractions/INativeLibrary.cs @@ -0,0 +1,29 @@ +using LLama.Native; +using System; +using System.Collections.Generic; +using System.Text; + +namespace LLama.Abstractions +{ + /// + /// Descriptor of a native library. + /// + public interface INativeLibrary + { + /// + /// Metadata of this library. + /// + NativeLibraryMetadata? Metadata { get; } + + /// + /// Prepare the native library file and returns the local path of it. + /// If it's a relative path, LLamaSharp will search the path in the search directies you set. + /// + /// The system information of the current machine. + /// The log callback. + /// + /// The relative paths of the library. You could return multiple paths to try them one by one. If no file is available, please return an empty array. + /// + IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null); + } +} diff --git a/LLama/Abstractions/INativeLibrarySelectingPolicy.cs b/LLama/Abstractions/INativeLibrarySelectingPolicy.cs new file mode 100644 index 00000000..41335202 --- /dev/null +++ b/LLama/Abstractions/INativeLibrarySelectingPolicy.cs @@ -0,0 +1,24 @@ +using LLama.Native; +using System; +using System.Collections.Generic; +using System.Text; + +namespace LLama.Abstractions +{ +#if NET6_0_OR_GREATER + /// + /// Decides the selected native library that should be loaded according to the configurations. + /// + public interface INativeLibrarySelectingPolicy + { + /// + /// Select the native library. + /// + /// + /// The system information of the current machine. + /// The log callback. + /// The information of the selected native library files, in order by priority from the beginning to the end. + IEnumerable Apply(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback = null); + } +#endif +} diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj index 3947b7c3..2d8bbefd 100644 --- a/LLama/LLamaSharp.csproj +++ b/LLama/LLamaSharp.csproj @@ -1,9 +1,9 @@ - netstandard2.0;net6.0;net7.0;net8.0 + net6.0;net7.0;net8.0;netstandard2.0 LLama enable - 10 + 12 AnyCPU;x64;Arm64 True diff --git a/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs new file mode 100644 index 00000000..5cb3b0c5 --- /dev/null +++ b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs @@ -0,0 +1,69 @@ +using LLama.Abstractions; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + /// + public class DefaultNativeLibrarySelectingPolicy: INativeLibrarySelectingPolicy + { + /// + public IEnumerable Apply(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) + { + List results = new(); + + // Show the configuration we're working with + Log(description.ToString(), LLamaLogLevel.Info, logCallback); + + // If a specific path is requested, only use it, no fall back. + if (!string.IsNullOrEmpty(description.Path)) + { + yield return new NativeLibraryFromPath(description.Path); + } + else + { + if (description.UseCuda) + { + yield return new NativeLibraryWithCuda(systemInfo.CudaMajorVersion, description.Library, description.SkipCheck); + } + + if(!description.UseCuda || description.AllowFallback) + { + if (description.AllowFallback) + { + // Try all of the AVX levels we can support. + if (description.AvxLevel >= AvxLevel.Avx512) + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx512, description.SkipCheck); + + if (description.AvxLevel >= AvxLevel.Avx2) + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx2, description.SkipCheck); + + if (description.AvxLevel >= AvxLevel.Avx) + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.Avx, description.SkipCheck); + + yield return new NativeLibraryWithAvx(description.Library, AvxLevel.None, description.SkipCheck); + } + else + { + yield return new NativeLibraryWithAvx(description.Library, description.AvxLevel, description.SkipCheck); + } + } + + if(systemInfo.OSPlatform == OSPlatform.OSX || description.AllowFallback) + { + yield return new NativeLibraryWithMacOrFallback(description.Library, description.SkipCheck); + } + } + } + + private void Log(string message, LLamaLogLevel level, NativeLogConfig.LLamaLogCallback? logCallback) + { + if (!message.EndsWith("\n")) + message += "\n"; + + logCallback?.Invoke(level, message); + } + } +#endif +} diff --git a/LLama/Native/Load/NativeLibraryConfig.cs b/LLama/Native/Load/NativeLibraryConfig.cs new file mode 100644 index 00000000..69ae7745 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryConfig.cs @@ -0,0 +1,605 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using LLama.Abstractions; +using Microsoft.Extensions.Logging; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + /// + /// Allows configuration of the native llama.cpp libraries to load and use. + /// All configuration must be done before using **any** other LLamaSharp methods! + /// + public sealed partial class NativeLibraryConfig + { + private string? _libraryPath; + + private bool _useCuda = true; + private AvxLevel _avxLevel; + private bool _allowFallback = true; + private bool _skipCheck = false; + + /// + /// search directory -> priority level, 0 is the lowest. + /// + private readonly List _searchDirectories = new List(); + + internal INativeLibrarySelectingPolicy SelectingPolicy { get; private set; } = new DefaultNativeLibrarySelectingPolicy(); + + #region configurators + /// + /// Load a specified native library as backend for LLamaSharp. + /// When this method is called, all the other configurations will be ignored. + /// + /// The full path to the native library to load. + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfig WithLibrary(string? libraryPath) + { + ThrowIfLoaded(); + + _libraryPath = libraryPath; + return this; + } + + /// + /// Configure whether to use cuda backend if possible. Default is true. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfig WithCuda(bool enable = true) + { + ThrowIfLoaded(); + + _useCuda = enable; + return this; + } + + /// + /// Configure the prefferred avx support level of the backend. + /// Default value is detected automatically due to your operating system. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfig WithAvx(AvxLevel level) + { + ThrowIfLoaded(); + + _avxLevel = level; + return this; + } + + /// + /// Configure whether to allow fallback when there's no match for preferred settings. Default is true. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfig WithAutoFallback(bool enable = true) + { + ThrowIfLoaded(); + + _allowFallback = enable; + return this; + } + + /// + /// Whether to skip the check when you don't allow fallback. This option + /// may be useful under some complex conditions. For example, you're sure + /// you have your cublas configured but LLamaSharp take it as invalid by mistake. Default is false; + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfig SkipCheck(bool enable = true) + { + ThrowIfLoaded(); + + _skipCheck = enable; + return this; + } + + /// + /// Add self-defined search directories. Note that the file stucture of the added + /// directories must be the same as the default directory. Besides, the directory + /// won't be used recursively. + /// + /// + /// + public NativeLibraryConfig WithSearchDirectories(IEnumerable directories) + { + ThrowIfLoaded(); + + _searchDirectories.AddRange(directories); + return this; + } + + /// + /// Add self-defined search directories. Note that the file stucture of the added + /// directories must be the same as the default directory. Besides, the directory + /// won't be used recursively. + /// + /// + /// + public NativeLibraryConfig WithSearchDirectory(string directory) + { + ThrowIfLoaded(); + + _searchDirectories.Add(directory); + return this; + } + + /// + /// Set the policy which decides how to select the desired native libraries and order them by priority. + /// By default we use . + /// + /// + /// + public NativeLibraryConfig WithSelectingPolicy(INativeLibrarySelectingPolicy policy) + { + ThrowIfLoaded(); + + SelectingPolicy = policy; + return this; + } + + #endregion + + internal Description CheckAndGatherDescription() + { + if (_allowFallback && _skipCheck) + throw new ArgumentException("Cannot skip the check when fallback is allowed."); + + var path = _libraryPath; + + + return new Description( + path, + NativeLibraryName, + _useCuda, + _avxLevel, + _allowFallback, + _skipCheck, + _searchDirectories.Concat(new[] { "./" }).ToArray() + ); + } + + internal static string AvxLevelToString(AvxLevel level) + { + return level switch + { + AvxLevel.None => string.Empty, + AvxLevel.Avx => "avx", + AvxLevel.Avx2 => "avx2", + AvxLevel.Avx512 => "avx512", + _ => throw new ArgumentException($"Unknown AvxLevel '{level}'") + }; + } + + /// + /// Private constructor prevents new instances of this class being created + /// + private NativeLibraryConfig(NativeLibraryName nativeLibraryName) + { + NativeLibraryName = nativeLibraryName; + + // Automatically detect the highest supported AVX level + if (System.Runtime.Intrinsics.X86.Avx.IsSupported) + _avxLevel = AvxLevel.Avx; + if (System.Runtime.Intrinsics.X86.Avx2.IsSupported) + _avxLevel = AvxLevel.Avx2; + + if (CheckAVX512()) + _avxLevel = AvxLevel.Avx512; + } + + private static bool CheckAVX512() + { + if (!System.Runtime.Intrinsics.X86.X86Base.IsSupported) + return false; + + // ReSharper disable UnusedVariable (ebx is used when < NET8) + var (_, ebx, ecx, _) = System.Runtime.Intrinsics.X86.X86Base.CpuId(7, 0); + // ReSharper restore UnusedVariable + + var vnni = (ecx & 0b_1000_0000_0000) != 0; + +#if NET8_0_OR_GREATER + var f = System.Runtime.Intrinsics.X86.Avx512F.IsSupported; + var bw = System.Runtime.Intrinsics.X86.Avx512BW.IsSupported; + var vbmi = System.Runtime.Intrinsics.X86.Avx512Vbmi.IsSupported; +#else + var f = (ebx & (1 << 16)) != 0; + var bw = (ebx & (1 << 30)) != 0; + var vbmi = (ecx & 0b_0000_0000_0010) != 0; +#endif + + return vnni && vbmi && bw && f; + } + + /// + /// The description of the native library configurations that's already specified. + /// + /// + /// + /// + /// + /// + /// + /// + public record Description(string? Path, NativeLibraryName Library, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, + string[] SearchDirectories) + { + /// + public override string ToString() + { + string avxLevelString = AvxLevel switch + { + AvxLevel.None => "NoAVX", + AvxLevel.Avx => "AVX", + AvxLevel.Avx2 => "AVX2", + AvxLevel.Avx512 => "AVX512", + _ => "Unknown" + }; + + string searchDirectoriesString = "{ " + string.Join(", ", SearchDirectories) + " }"; + + return $"NativeLibraryConfig Description:\n" + + $"- LibraryName: {Library}\n" + + $"- Path: '{Path}'\n" + + $"- PreferCuda: {UseCuda}\n" + + $"- PreferredAvxLevel: {avxLevelString}\n" + + $"- AllowFallback: {AllowFallback}\n" + + $"- SkipCheck: {SkipCheck}\n" + + $"- SearchDirectories and Priorities: {searchDirectoriesString}"; + } + } + } +#endif + + public sealed partial class NativeLibraryConfig + { + /// + /// Set configurations for all the native libraries, including LLama and LLava + /// + [Obsolete("Please use NativeLibraryConfig.All instead, or set configurations for NativeLibraryConfig.LLama and NativeLibraryConfig.LLavaShared respectively.")] + public static NativeLibraryConfigContainer Instance { get; } + + /// + /// Set configurations for all the native libraries, including LLama and LLava + /// + public static NativeLibraryConfigContainer All { get; } + + /// + /// Configuration for LLama native library + /// + public static NativeLibraryConfig LLama { get; } + + /// + /// Configuration for LLava native library + /// + public static NativeLibraryConfig LLavaShared { get; } + + /// + /// A dictionary mapping from version to corresponding llama.cpp commit hash. + /// The version should be formatted int `[major].[minor].[patch]`. But there's an exceptance that you can + /// use `master` as a version to get the llama.cpp commit hash from the master branch. + /// + public static Dictionary VersionMap { get; } = new Dictionary() + // This value should be changed when we're going to publish new release. (any better approach?) + { + {"master", "f7001c"} + }; + + /// + /// The current version. + /// + public static readonly string CurrentVersion = "master"; // This should be changed before publishing new version. TODO: any better approach? + + static NativeLibraryConfig() + { + LLama = new(NativeLibraryName.Llama); + LLavaShared = new(NativeLibraryName.LlavaShared); + All = new(LLama, LLavaShared); + Instance = All; + } + +#if NETSTANDARD2_0 + private NativeLibraryConfig(NativeLibraryName nativeLibraryName) + { + NativeLibraryName = nativeLibraryName; + } +#endif + + /// + /// Check if the native library has already been loaded. Configuration cannot be modified if this is true. + /// + public bool LibraryHasLoaded { get; internal set; } + + internal NativeLibraryName NativeLibraryName { get; } + + internal NativeLogConfig.LLamaLogCallback? LogCallback { get; private set; } = null; + + private void ThrowIfLoaded() + { + if (LibraryHasLoaded) + throw new InvalidOperationException("The library has already loaded, you can't change the configurations. " + + "Please finish the configuration setting before any call to LLamaSharp native APIs." + + "Please use NativeLibraryConfig.DryRun if you want to see whether it's loaded " + + "successfully but still have chance to modify the configurations."); + } + + /// + /// Set the log callback that will be used for all llama.cpp log messages + /// + /// + /// + public NativeLibraryConfig WithLogCallback(NativeLogConfig.LLamaLogCallback? callback) + { + ThrowIfLoaded(); + + LogCallback = callback; + return this; + } + + /// + /// Set the log callback that will be used for all llama.cpp log messages + /// + /// + /// + public NativeLibraryConfig WithLogCallback(ILogger? logger) + { + ThrowIfLoaded(); + + // Redirect to llama_log_set. This will wrap the logger in a delegate and bind that as the log callback instead. + NativeLogConfig.llama_log_set(logger); + + return this; + } + + /// + /// Try to load the native library with the current configurations, + /// but do not actually set it to . + /// + /// You can still modify the configuration after this calling but only before any call from . + /// + /// Whether the running is successful. + public bool DryRun() + { + LogCallback?.Invoke(LLamaLogLevel.Debug, $"Beginning dry run for {this.NativeLibraryName.GetLibraryName()}..."); + return NativeLibraryUtils.TryLoadLibrary(this) != IntPtr.Zero; + } + } + + /// + /// A class to set same configurations to multiple libraries at the same time. + /// + public sealed class NativeLibraryConfigContainer + { + private NativeLibraryConfig[] _configs; + + /// + /// All the configurations in this container. + /// Please avoid calling this property explicitly, use + /// and instead. + /// + public NativeLibraryConfig[] Configs => _configs; + + internal NativeLibraryConfigContainer(params NativeLibraryConfig[] configs) + { + _configs = configs; + } + + #region configurators + +#if NET6_0_OR_GREATER + /// + /// Load a specified native library as backend for LLamaSharp. + /// When this method is called, all the other configurations will be ignored. + /// + /// The full path to the llama library to load. + /// The full path to the llava library to load. + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfigContainer WithLibrary(string? llamaPath, string? llavaPath) + { + foreach(var config in _configs) + { + if(config.NativeLibraryName == NativeLibraryName.Llama && llamaPath is not null) + { + config.WithLibrary(llamaPath); + } + if(config.NativeLibraryName == NativeLibraryName.LlavaShared && llavaPath is not null) + { + config.WithLibrary(llavaPath); + } + } + + return this; + } + + /// + /// Configure whether to use cuda backend if possible. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfigContainer WithCuda(bool enable = true) + { + foreach(var config in _configs) + { + config.WithCuda(enable); + } + return this; + } + + /// + /// Configure the prefferred avx support level of the backend. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfigContainer WithAvx(AvxLevel level) + { + foreach (var config in _configs) + { + config.WithAvx(level); + } + return this; + } + + /// + /// Configure whether to allow fallback when there's no match for preferred settings. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfigContainer WithAutoFallback(bool enable = true) + { + foreach (var config in _configs) + { + config.WithAutoFallback(enable); + } + return this; + } + + /// + /// Whether to skip the check when you don't allow fallback. This option + /// may be useful under some complex conditions. For example, you're sure + /// you have your cublas configured but LLamaSharp take it as invalid by mistake. + /// + /// + /// + /// Thrown if `LibraryHasLoaded` is true. + public NativeLibraryConfigContainer SkipCheck(bool enable = true) + { + foreach (var config in _configs) + { + config.SkipCheck(enable); + } + return this; + } + + /// + /// Add self-defined search directories. Note that the file stucture of the added + /// directories must be the same as the default directory. Besides, the directory + /// won't be used recursively. + /// + /// + /// + public NativeLibraryConfigContainer WithSearchDirectories(IEnumerable directories) + { + foreach (var config in _configs) + { + config.WithSearchDirectories(directories); + } + return this; + } + + /// + /// Add self-defined search directories. Note that the file stucture of the added + /// directories must be the same as the default directory. Besides, the directory + /// won't be used recursively. + /// + /// + /// + public NativeLibraryConfigContainer WithSearchDirectory(string directory) + { + foreach (var config in _configs) + { + config.WithSearchDirectory(directory); + } + return this; + } + + /// + /// Set the policy which decides how to select the desired native libraries and order them by priority. + /// By default we use . + /// + /// + /// + public NativeLibraryConfigContainer WithSelectingPolicy(INativeLibrarySelectingPolicy policy) + { + foreach (var config in _configs) + { + config.WithSelectingPolicy(policy); + } + return this; + } +#endif + + /// + /// Set the log callback that will be used for all llama.cpp log messages + /// + /// + /// + public NativeLibraryConfigContainer WithLogCallback(NativeLogConfig.LLamaLogCallback? callback) + { + foreach (var config in _configs) + { + config.WithLogCallback(callback); + } + return this; + } + + /// + /// Set the log callback that will be used for all llama.cpp log messages + /// + /// + /// + public NativeLibraryConfigContainer WithLogCallback(ILogger? logger) + { + foreach (var config in _configs) + { + config.WithLogCallback(logger); + } + return this; + } + + #endregion + + /// + /// Try to load the native library with the current configurations, + /// but do not actually set it to . + /// + /// You can still modify the configuration after this calling but only before any call from . + /// + /// Whether the running is successful. + public bool DryRun() + { + return _configs.All(config => config.DryRun()); + } + } + + /// + /// The name of the native library + /// + public enum NativeLibraryName + { + /// + /// The native library compiled from llama.cpp. + /// + Llama, + /// + /// The native library compiled from the LLaVA example of llama.cpp. + /// + LlavaShared + } + + internal static class LibraryNameExtensions + { + public static string GetLibraryName(this NativeLibraryName name) + { + switch (name) + { + case NativeLibraryName.Llama: + return NativeApi.libraryName; + case NativeLibraryName.LlavaShared: + return NativeApi.llavaLibraryName; + default: + throw new ArgumentOutOfRangeException(nameof(name), name, null); + } + } + } +} diff --git a/LLama/Native/Load/NativeLibraryFromPath.cs b/LLama/Native/Load/NativeLibraryFromPath.cs new file mode 100644 index 00000000..c3e62a22 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryFromPath.cs @@ -0,0 +1,31 @@ +using LLama.Abstractions; +using System.Collections.Generic; + +namespace LLama.Native +{ + /// + /// A native library specified with a lcoal file path. + /// + public class NativeLibraryFromPath: INativeLibrary + { + private string _path; + + /// + public NativeLibraryMetadata? Metadata => null; + + /// + /// + /// + /// + public NativeLibraryFromPath(string path) + { + _path = path; + } + + /// + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) + { + return [_path]; + } + } +} diff --git a/LLama/Native/Load/NativeLibraryMetadata.cs b/LLama/Native/Load/NativeLibraryMetadata.cs new file mode 100644 index 00000000..654c9002 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryMetadata.cs @@ -0,0 +1,43 @@ + +namespace LLama.Native +{ + /// + /// Information of a native library file. + /// + /// Which kind of library it is. + /// Whether it's compiled with cublas. + /// Which AvxLevel it's compiled with. + public record class NativeLibraryMetadata(NativeLibraryName NativeLibraryName, bool UseCuda, AvxLevel AvxLevel) + { + public override string ToString() + { + return $"(NativeLibraryName: {NativeLibraryName}, UseCuda: {UseCuda}, AvxLevel: {AvxLevel})"; + } + } + + /// + /// Avx support configuration + /// + public enum AvxLevel + { + /// + /// No AVX + /// + None, + + /// + /// Advanced Vector Extensions (supported by most processors after 2011) + /// + Avx, + + /// + /// AVX2 (supported by most processors after 2013) + /// + Avx2, + + /// + /// AVX512 (supported by some processors after 2016, not widely supported) + /// + Avx512, + } +} diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs new file mode 100644 index 00000000..e3ca06e1 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryUtils.cs @@ -0,0 +1,155 @@ +using LLama.Exceptions; +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ + internal static class NativeLibraryUtils + { + /// + /// Try to load libllama/llava_shared, using CPU feature detection to try and load a more specialised DLL if possible + /// + /// The library handle to unload later, or IntPtr.Zero if no library was loaded + internal static IntPtr TryLoadLibrary(NativeLibraryConfig config) + { +#if NET6_0_OR_GREATER + var description = config.CheckAndGatherDescription(); + var systemInfo = SystemInfo.Get(); + Log($"Loading library: '{config.NativeLibraryName.GetLibraryName()}'", LLamaLogLevel.Debug, config.LogCallback); + + // Get platform specific parts of the path (e.g. .so/.dll/.dylib, libName prefix or not) + NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var ext, out var libPrefix); + Log($"Detected OS Platform: '{systemInfo.OSPlatform}'", LLamaLogLevel.Info, config.LogCallback); + Log($"Detected OS string: '{os}'", LLamaLogLevel.Debug, config.LogCallback); + Log($"Detected extension string: '{ext}'", LLamaLogLevel.Debug, config.LogCallback); + Log($"Detected prefix string: '{libPrefix}'", LLamaLogLevel.Debug, config.LogCallback); + + // Set the flag to ensure this config can no longer be modified + config.LibraryHasLoaded = true; + + // Show the configuration we're working with + Log(description.ToString(), LLamaLogLevel.Info, config.LogCallback); + + // Get the libraries ordered by priority from the selecting policy. + var libraries = config.SelectingPolicy.Apply(description, systemInfo, config.LogCallback); + + foreach (var library in libraries) + { + // Prepare the local library file and get the path. + var paths = library.Prepare(systemInfo, config.LogCallback); + foreach (var path in paths) + { + Log($"Got relative library path '{path}' from local with {library.Metadata}, trying to load it...", LLamaLogLevel.Debug, config.LogCallback); + + var result = TryLoad(path, description.SearchDirectories, config.LogCallback); + if (result != IntPtr.Zero) + { + return result; + } + } + } + + // If fallback is allowed, we will make the last try (the default system loading) when calling the native api. + // Otherwise we throw an exception here. + if (!description.AllowFallback) + { + throw new RuntimeError("Failed to load the native library. Please check the log for more information."); + } +#endif + + Log($"No library was loaded before calling native apis. " + + $"This is not an error under netstandard2.0 but needs attention with net6 or higher.", LLamaLogLevel.Warning, config.LogCallback); + return IntPtr.Zero; + +#if NET6_0_OR_GREATER + // Try to load a DLL from the path. + // Returns null if nothing is loaded. + static IntPtr TryLoad(string path, IEnumerable searchDirectories, NativeLogConfig.LLamaLogCallback? logCallback) + { + var fullPath = TryFindPath(path, searchDirectories); + Log($"Found full path file '{fullPath}' for relative path '{path}'", LLamaLogLevel.Debug, logCallback); + if (NativeLibrary.TryLoad(fullPath, out var handle)) + { + Log($"Successfully loaded '{fullPath}'", LLamaLogLevel.Info, logCallback); + return handle; + } + + Log($"Failed Loading '{fullPath}'", LLamaLogLevel.Info, logCallback); + return IntPtr.Zero; + } +#endif + } + + // Try to find the given file in any of the possible search paths + private static string TryFindPath(string filename, IEnumerable searchDirectories) + { + // Try the configured search directories in the configuration + foreach (var path in searchDirectories) + { + var candidate = Path.Combine(path, filename); + if (File.Exists(candidate)) + return candidate; + } + + // Try a few other possible paths + var possiblePathPrefix = new[] { + AppDomain.CurrentDomain.BaseDirectory, + Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? "" + }; + + foreach (var path in possiblePathPrefix) + { + var candidate = Path.Combine(path, filename); + if (File.Exists(candidate)) + return candidate; + } + + return filename; + } + + private static void Log(string message, LLamaLogLevel level, NativeLogConfig.LLamaLogCallback? logCallback) + { + if (!message.EndsWith("\n")) + message += "\n"; + + logCallback?.Invoke(level, message); + } + +#if NET6_0_OR_GREATER + public static void GetPlatformPathParts(OSPlatform platform, out string os, out string fileExtension, out string libPrefix) + { + if (platform == OSPlatform.Windows) + { + os = "win-x64"; + fileExtension = ".dll"; + libPrefix = ""; + return; + } + + if (platform == OSPlatform.Linux) + { + os = "linux-x64"; + fileExtension = ".so"; + libPrefix = "lib"; + return; + } + + if (platform == OSPlatform.OSX) + { + fileExtension = ".dylib"; + + os = System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported + ? "osx-arm64" + : "osx-x64"; + libPrefix = "lib"; + } + else + { + throw new RuntimeError("Your operating system is not supported, please open an issue in LLamaSharp."); + } + } +#endif + } +} diff --git a/LLama/Native/Load/NativeLibraryWithAvx.cs b/LLama/Native/Load/NativeLibraryWithAvx.cs new file mode 100644 index 00000000..7b5421b4 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryWithAvx.cs @@ -0,0 +1,62 @@ +using LLama.Abstractions; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + /// + /// A native library compiled with avx support but without cuda/cublas. + /// + public class NativeLibraryWithAvx : INativeLibrary + { + private NativeLibraryName _libraryName; + private AvxLevel _avxLevel; + private bool _skipCheck; + + /// + public NativeLibraryMetadata? Metadata + { + get + { + return new NativeLibraryMetadata(_libraryName, false, _avxLevel); + } + } + + /// + /// + /// + /// + /// + /// + public NativeLibraryWithAvx(NativeLibraryName libraryName, AvxLevel avxLevel, bool skipCheck) + { + _libraryName = libraryName; + _avxLevel = avxLevel; + _skipCheck = skipCheck; + } + + /// + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) + { + if (systemInfo.OSPlatform != OSPlatform.Windows && systemInfo.OSPlatform != OSPlatform.Linux && !_skipCheck) + { + // Not supported on systems other than Windows and Linux. + return []; + } + var path = GetAvxPath(systemInfo, _avxLevel, logCallback); + return path is null ? [] : [path]; + } + + private string? GetAvxPath(SystemInfo systemInfo, AvxLevel avxLevel, NativeLogConfig.LLamaLogCallback? logCallback) + { + NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); + var avxStr = NativeLibraryConfig.AvxLevelToString(avxLevel); + if (!string.IsNullOrEmpty(avxStr)) + avxStr += "/"; + var relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; + return relativePath; + } + } +#endif +} diff --git a/LLama/Native/Load/NativeLibraryWithCuda.cs b/LLama/Native/Load/NativeLibraryWithCuda.cs new file mode 100644 index 00000000..d3b06b86 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryWithCuda.cs @@ -0,0 +1,79 @@ +using LLama.Abstractions; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + /// + /// A native library compiled with cublas/cuda. + /// + public class NativeLibraryWithCuda : INativeLibrary + { + private int _majorCudaVersion; + private NativeLibraryName _libraryName; + private AvxLevel _avxLevel; + private bool _skipCheck; + + /// + public NativeLibraryMetadata? Metadata + { + get + { + return new NativeLibraryMetadata(_libraryName, true, _avxLevel); + } + } + + /// + /// + /// + /// + /// + /// + public NativeLibraryWithCuda(int majorCudaVersion, NativeLibraryName libraryName, bool skipCheck) + { + _majorCudaVersion = majorCudaVersion; + _libraryName = libraryName; + _skipCheck = skipCheck; + } + + /// + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) + { + // TODO: Avx level is ignored now, needs to be implemented in the future. + if (systemInfo.OSPlatform == OSPlatform.Windows || systemInfo.OSPlatform == OSPlatform.Linux || _skipCheck) + { + if (_majorCudaVersion == -1 && _skipCheck) + { + // Currently only 11 and 12 are supported. + var cuda12LibraryPath = GetCudaPath(systemInfo, 12, logCallback); + if (cuda12LibraryPath is not null) + { + yield return cuda12LibraryPath; + } + var cuda11LibraryPath = GetCudaPath(systemInfo, 11, logCallback); + if (cuda11LibraryPath is not null) + { + yield return cuda11LibraryPath; + } + } + else if (_majorCudaVersion != -1) + { + var cudaLibraryPath = GetCudaPath(systemInfo, _majorCudaVersion, logCallback); + if (cudaLibraryPath is not null) + { + yield return cudaLibraryPath; + } + } + } + } + + private string? GetCudaPath(SystemInfo systemInfo, int cudaVersion, NativeLogConfig.LLamaLogCallback? logCallback) + { + NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); + var relativePath = $"runtimes/{os}/native/cuda{cudaVersion}/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; + return relativePath; + } + } +#endif +} diff --git a/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs b/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs new file mode 100644 index 00000000..5df33930 --- /dev/null +++ b/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs @@ -0,0 +1,64 @@ +using LLama.Abstractions; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace LLama.Native +{ +#if NET6_0_OR_GREATER + /// + /// A native library compiled on Mac, or fallbacks from all other libraries in the selection. + /// + public class NativeLibraryWithMacOrFallback : INativeLibrary + { + private NativeLibraryName _libraryName; + private bool _skipCheck; + + /// + public NativeLibraryMetadata? Metadata + { + get + { + return new NativeLibraryMetadata(_libraryName, false, AvxLevel.None); + } + } + + /// + /// + /// + /// + /// + public NativeLibraryWithMacOrFallback(NativeLibraryName libraryName, bool skipCheck) + { + _libraryName = libraryName; + _skipCheck = skipCheck; + } + + /// + public IEnumerable Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback) + { + var path = GetPath(systemInfo, AvxLevel.None, logCallback); + return path is null ?[] : [path]; + } + + private string? GetPath(SystemInfo systemInfo, AvxLevel avxLevel, NativeLogConfig.LLamaLogCallback? logCallback) + { + NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix); + string relativePath; + if (systemInfo.OSPlatform == OSPlatform.OSX) + { + relativePath = $"runtimes/{os}/native/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; + } + else + { + var avxStr = NativeLibraryConfig.AvxLevelToString(AvxLevel.None); + if (!string.IsNullOrEmpty(avxStr)) + avxStr += "/"; + + relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}"; + } + + return relativePath; + } + } +#endif +} diff --git a/LLama/Native/Load/SystemInfo.cs b/LLama/Native/Load/SystemInfo.cs new file mode 100644 index 00000000..0ffc67e9 --- /dev/null +++ b/LLama/Native/Load/SystemInfo.cs @@ -0,0 +1,129 @@ +using System; +using System.IO; +using System.Runtime.InteropServices; +using System.Text.Json; + +namespace LLama.Native +{ + /// + /// Operating system information. + /// + /// + /// + public record class SystemInfo(OSPlatform OSPlatform, int CudaMajorVersion) + { + /// + /// Get the system information of the current machine. + /// + /// + /// + public static SystemInfo Get() + { + OSPlatform platform; + if(RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + platform = OSPlatform.Windows; + } + else if(RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + platform = OSPlatform.Linux; + } + else if(RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + platform = OSPlatform.OSX; + } + else + { + throw new PlatformNotSupportedException(); + } + + return new SystemInfo(platform, GetCudaMajorVersion()); + } + + #region CUDA version + private static int GetCudaMajorVersion() + { + string? cudaPath; + string version = ""; + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + cudaPath = Environment.GetEnvironmentVariable("CUDA_PATH"); + if (cudaPath is null) + { + return -1; + } + + //Ensuring cuda bin path is reachable. Especially for MAUI environment. + string cudaBinPath = Path.Combine(cudaPath, "bin"); + + if (Directory.Exists(cudaBinPath)) + { + AddDllDirectory(cudaBinPath); + } + + version = GetCudaVersionFromPath(cudaPath); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + // Try the default first + cudaPath = "/usr/local/bin/cuda"; + version = GetCudaVersionFromPath(cudaPath); + if (string.IsNullOrEmpty(version)) + { + cudaPath = Environment.GetEnvironmentVariable("LD_LIBRARY_PATH"); + if (cudaPath is null) + { + return -1; + } + foreach (var path in cudaPath.Split(':')) + { + version = GetCudaVersionFromPath(Path.Combine(path, "..")); + if (string.IsNullOrEmpty(version)) + { + break; + } + } + } + } + + if (string.IsNullOrEmpty(version)) + return -1; + + version = version.Split('.')[0]; + if (int.TryParse(version, out var majorVersion)) + return majorVersion; + + return -1; + } + + private static string GetCudaVersionFromPath(string cudaPath) + { + try + { + string json = File.ReadAllText(Path.Combine(cudaPath, cudaVersionFile)); + using (JsonDocument document = JsonDocument.Parse(json)) + { + JsonElement root = document.RootElement; + JsonElement cublasNode = root.GetProperty("libcublas"); + JsonElement versionNode = cublasNode.GetProperty("version"); + if (versionNode.ValueKind == JsonValueKind.Undefined) + { + return string.Empty; + } + return versionNode.GetString() ?? ""; + } + } + catch (Exception) + { + return string.Empty; + } + } + + // Put it here to avoid calling NativeApi when getting the cuda version. + [DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)] + internal static extern int AddDllDirectory(string NewDirectory); + + private const string cudaVersionFile = "version.json"; + #endregion + } +} diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index 4b4beea2..277555e7 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -18,7 +18,8 @@ namespace LLama.Native SetDllImportResolver(); // Set flag to indicate that this point has been passed. No native library config can be done after this point. - NativeLibraryConfig.LibraryHasLoaded = true; + NativeLibraryConfig.LLama.LibraryHasLoaded = true; + NativeLibraryConfig.LLavaShared.LibraryHasLoaded = true; // Immediately make a call which requires loading the llama DLL. This method call // can't fail unless the DLL hasn't been loaded. @@ -38,8 +39,8 @@ namespace LLama.Native } // Now that the "loaded" flag is set configure logging in llama.cpp - if (NativeLibraryConfig.Instance.LogCallback != null) - NativeLogConfig.llama_log_set(NativeLibraryConfig.Instance.LogCallback); + if (NativeLibraryConfig.LLama.LogCallback != null) + NativeLogConfig.llama_log_set(NativeLibraryConfig.LLama.LogCallback); // Init llama.cpp backend llama_backend_init(); @@ -64,7 +65,7 @@ namespace LLama.Native return _loadedLlamaHandle; // Try to load a preferred library, based on CPU feature detection - _loadedLlamaHandle = TryLoadLibraries(LibraryName.Llama); + _loadedLlamaHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLama); return _loadedLlamaHandle; } @@ -75,7 +76,7 @@ namespace LLama.Native return _loadedLlavaSharedHandle; // Try to load a preferred library, based on CPU feature detection - _loadedLlavaSharedHandle = TryLoadLibraries(LibraryName.LlavaShared); + _loadedLlavaSharedHandle = NativeLibraryUtils.TryLoadLibrary(NativeLibraryConfig.LLavaShared); return _loadedLlavaSharedHandle; } @@ -85,341 +86,6 @@ namespace LLama.Native #endif } - private static void Log(string message, LLamaLogLevel level) - { - if (!message.EndsWith("\n")) - message += "\n"; - - NativeLibraryConfig.Instance.LogCallback?.Invoke(level, message); - } - - #region CUDA version - private static int GetCudaMajorVersion() - { - string? cudaPath; - string version = ""; - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - cudaPath = Environment.GetEnvironmentVariable("CUDA_PATH"); - if (cudaPath is null) - { - return -1; - } - - //Ensuring cuda bin path is reachable. Especially for MAUI environment. - string cudaBinPath = Path.Combine(cudaPath, "bin"); - - if (Directory.Exists(cudaBinPath)) - { - AddDllDirectory(cudaBinPath); - } - - version = GetCudaVersionFromPath(cudaPath); - } - else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) - { - // Try the default first - cudaPath = "/usr/local/bin/cuda"; - version = GetCudaVersionFromPath(cudaPath); - if (string.IsNullOrEmpty(version)) - { - cudaPath = Environment.GetEnvironmentVariable("LD_LIBRARY_PATH"); - if (cudaPath is null) - { - return -1; - } - foreach (var path in cudaPath.Split(':')) - { - version = GetCudaVersionFromPath(Path.Combine(path, "..")); - if (string.IsNullOrEmpty(version)) - { - break; - } - } - } - } - - if (string.IsNullOrEmpty(version)) - return -1; - - version = version.Split('.')[0]; - if (int.TryParse(version, out var majorVersion)) - return majorVersion; - - return -1; - } - - private static string GetCudaVersionFromPath(string cudaPath) - { - try - { - string json = File.ReadAllText(Path.Combine(cudaPath, cudaVersionFile)); - using (JsonDocument document = JsonDocument.Parse(json)) - { - JsonElement root = document.RootElement; - JsonElement cublasNode = root.GetProperty("libcublas"); - JsonElement versionNode = cublasNode.GetProperty("version"); - if (versionNode.ValueKind == JsonValueKind.Undefined) - { - return string.Empty; - } - return versionNode.GetString() ?? ""; - } - } - catch (Exception) - { - return string.Empty; - } - } - #endregion - -#if NET6_0_OR_GREATER - private static IEnumerable GetLibraryTryOrder(NativeLibraryConfig.Description configuration) - { - var loadingName = configuration.Library.GetLibraryName(); - Log($"Loading library: '{loadingName}'", LLamaLogLevel.Debug); - - // Get platform specific parts of the path (e.g. .so/.dll/.dylib, libName prefix or not) - GetPlatformPathParts(out var platform, out var os, out var ext, out var libPrefix); - Log($"Detected OS Platform: '{platform}'", LLamaLogLevel.Info); - Log($"Detected OS string: '{os}'", LLamaLogLevel.Debug); - Log($"Detected extension string: '{ext}'", LLamaLogLevel.Debug); - Log($"Detected prefix string: '{libPrefix}'", LLamaLogLevel.Debug); - - if (configuration.UseCuda && (platform == OSPlatform.Windows || platform == OSPlatform.Linux)) - { - var cudaVersion = GetCudaMajorVersion(); - Log($"Detected cuda major version {cudaVersion}.", LLamaLogLevel.Info); - - if (cudaVersion == -1 && !configuration.AllowFallback) - { - // if check skipped, we just try to load cuda libraries one by one. - if (configuration.SkipCheck) - { - yield return GetCudaLibraryPath(loadingName, "cuda12"); - yield return GetCudaLibraryPath(loadingName, "cuda11"); - } - else - { - throw new RuntimeError("Configured to load a cuda library but no cuda detected on your device."); - } - } - else if (cudaVersion == 11) - { - yield return GetCudaLibraryPath(loadingName, "cuda11"); - } - else if (cudaVersion == 12) - { - yield return GetCudaLibraryPath(loadingName, "cuda12"); - } - else if (cudaVersion > 0) - { - throw new RuntimeError($"Cuda version {cudaVersion} hasn't been supported by LLamaSharp, please open an issue for it."); - } - - // otherwise no cuda detected but allow fallback - } - - // Add the CPU/Metal libraries - if (platform == OSPlatform.OSX) - { - // On Mac it's very simple, there's no AVX to consider. - yield return GetMacLibraryPath(loadingName); - } - else - { - if (configuration.AllowFallback) - { - // Try all of the AVX levels we can support. - if (configuration.AvxLevel >= NativeLibraryConfig.AvxLevel.Avx512) - yield return GetAvxLibraryPath(loadingName, NativeLibraryConfig.AvxLevel.Avx512); - - if (configuration.AvxLevel >= NativeLibraryConfig.AvxLevel.Avx2) - yield return GetAvxLibraryPath(loadingName, NativeLibraryConfig.AvxLevel.Avx2); - - if (configuration.AvxLevel >= NativeLibraryConfig.AvxLevel.Avx) - yield return GetAvxLibraryPath(loadingName, NativeLibraryConfig.AvxLevel.Avx); - - yield return GetAvxLibraryPath(loadingName, NativeLibraryConfig.AvxLevel.None); - } - else - { - // Fallback is not allowed - use the exact specified AVX level - yield return GetAvxLibraryPath(loadingName, configuration.AvxLevel); - } - } - } - - private static string GetMacLibraryPath(string libraryName) - { - GetPlatformPathParts(out _, out var os, out var fileExtension, out var libPrefix); - - return $"runtimes/{os}/native/{libPrefix}{libraryName}{fileExtension}"; - } - - /// - /// Given a CUDA version and some path parts, create a complete path to the library file - /// - /// Library being loaded (e.g. "llama") - /// CUDA version (e.g. "cuda11") - /// - private static string GetCudaLibraryPath(string libraryName, string cuda) - { - GetPlatformPathParts(out _, out var os, out var fileExtension, out var libPrefix); - - return $"runtimes/{os}/native/{cuda}/{libPrefix}{libraryName}{fileExtension}"; - } - - /// - /// Given an AVX level and some path parts, create a complete path to the library file - /// - /// Library being loaded (e.g. "llama") - /// - /// - private static string GetAvxLibraryPath(string libraryName, NativeLibraryConfig.AvxLevel avx) - { - GetPlatformPathParts(out _, out var os, out var fileExtension, out var libPrefix); - - var avxStr = NativeLibraryConfig.AvxLevelToString(avx); - if (!string.IsNullOrEmpty(avxStr)) - avxStr += "/"; - - return $"runtimes/{os}/native/{avxStr}{libPrefix}{libraryName}{fileExtension}"; - } - - private static void GetPlatformPathParts(out OSPlatform platform, out string os, out string fileExtension, out string libPrefix) - { - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - platform = OSPlatform.Windows; - os = "win-x64"; - fileExtension = ".dll"; - libPrefix = ""; - return; - } - - if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) - { - platform = OSPlatform.Linux; - os = "linux-x64"; - fileExtension = ".so"; - libPrefix = "lib"; - return; - } - - if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) - { - platform = OSPlatform.OSX; - fileExtension = ".dylib"; - - os = System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported - ? "osx-arm64" - : "osx-x64"; - libPrefix = "lib"; - } - else - { - throw new RuntimeError("Your operating system is not supported, please open an issue in LLamaSharp."); - } - } -#endif - - /// - /// Try to load libllama/llava_shared, using CPU feature detection to try and load a more specialised DLL if possible - /// - /// The library handle to unload later, or IntPtr.Zero if no library was loaded - private static IntPtr TryLoadLibraries(LibraryName lib) - { -#if NET6_0_OR_GREATER - var configuration = NativeLibraryConfig.CheckAndGatherDescription(lib); - - // Set the flag to ensure the NativeLibraryConfig can no longer be modified - NativeLibraryConfig.LibraryHasLoaded = true; - - // Show the configuration we're working with - Log(configuration.ToString(), LLamaLogLevel.Info); - - // If a specific path is requested, load that or immediately fail - if (!string.IsNullOrEmpty(configuration.Path)) - { - if (!NativeLibrary.TryLoad(configuration.Path, out var handle)) - throw new RuntimeError($"Failed to load the native library [{configuration.Path}] you specified."); - - Log($"Successfully loaded the library [{configuration.Path}] specified by user", LLamaLogLevel.Info); - return handle; - } - - // Get a list of locations to try loading (in order of preference) - var libraryTryLoadOrder = GetLibraryTryOrder(configuration); - - foreach (var libraryPath in libraryTryLoadOrder) - { - var fullPath = TryFindPath(libraryPath); - Log($"Trying '{fullPath}'", LLamaLogLevel.Debug); - - var result = TryLoad(fullPath); - if (result != IntPtr.Zero) - { - Log($"Loaded '{fullPath}'", LLamaLogLevel.Info); - return result; - } - - Log($"Failed Loading '{fullPath}'", LLamaLogLevel.Info); - } - - if (!configuration.AllowFallback) - { - throw new RuntimeError("Failed to load the library that match your rule, please" + - " 1) check your rule." + - " 2) try to allow fallback." + - " 3) or open an issue if it's expected to be successful."); - } -#endif - - Log($"No library was loaded before calling native apis. " + - $"This is not an error under netstandard2.0 but needs attention with net6 or higher.", LLamaLogLevel.Warning); - return IntPtr.Zero; - -#if NET6_0_OR_GREATER - // Try to load a DLL from the path. - // Returns null if nothing is loaded. - static IntPtr TryLoad(string path) - { - if (NativeLibrary.TryLoad(path, out var handle)) - return handle; - - return IntPtr.Zero; - } - - // Try to find the given file in any of the possible search paths - string TryFindPath(string filename) - { - // Try the configured search directories in the configuration - foreach (var path in configuration.SearchDirectories) - { - var candidate = Path.Combine(path, filename); - if (File.Exists(candidate)) - return candidate; - } - - // Try a few other possible paths - var possiblePathPrefix = new[] { - AppDomain.CurrentDomain.BaseDirectory, - Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? "" - }; - - foreach (var path in possiblePathPrefix) - { - var candidate = Path.Combine(path, filename); - if (File.Exists(candidate)) - return candidate; - } - - return filename; - } -#endif - } - internal const string libraryName = "llama"; internal const string llavaLibraryName = "llava_shared"; private const string cudaVersionFile = "version.json"; diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs index ed456151..d57b00e7 100644 --- a/LLama/Native/NativeApi.cs +++ b/LLama/Native/NativeApi.cs @@ -19,9 +19,6 @@ namespace LLama.Native llama_max_devices(); } - [DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)] - private static extern int AddDllDirectory(string NewDirectory); - /// /// Get the maximum number of devices supported by llama.cpp /// diff --git a/LLama/Native/NativeLibraryConfig.cs b/LLama/Native/NativeLibraryConfig.cs deleted file mode 100644 index ef7cd7c1..00000000 --- a/LLama/Native/NativeLibraryConfig.cs +++ /dev/null @@ -1,332 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using Microsoft.Extensions.Logging; - -namespace LLama.Native -{ -#if NET6_0_OR_GREATER - /// - /// Allows configuration of the native llama.cpp libraries to load and use. - /// All configuration must be done before using **any** other LLamaSharp methods! - /// - public sealed partial class NativeLibraryConfig - { - private string? _libraryPath; - private string? _libraryPathLLava; - - private bool _useCuda = true; - private AvxLevel _avxLevel; - private bool _allowFallback = true; - private bool _skipCheck = false; - - /// - /// search directory -> priority level, 0 is the lowest. - /// - private readonly List _searchDirectories = new List(); - - #region configurators - /// - /// Load a specified native library as backend for LLamaSharp. - /// When this method is called, all the other configurations will be ignored. - /// - /// The full path to the llama library to load. - /// The full path to the llava library to load. - /// Thrown if `LibraryHasLoaded` is true. - public NativeLibraryConfig WithLibrary(string? llamaPath, string? llavaPath) - { - ThrowIfLoaded(); - - _libraryPath = llamaPath; - _libraryPathLLava = llavaPath; - return this; - } - - /// - /// Configure whether to use cuda backend if possible. - /// - /// - /// - /// Thrown if `LibraryHasLoaded` is true. - public NativeLibraryConfig WithCuda(bool enable = true) - { - ThrowIfLoaded(); - - _useCuda = enable; - return this; - } - - /// - /// Configure the prefferred avx support level of the backend. - /// - /// - /// - /// Thrown if `LibraryHasLoaded` is true. - public NativeLibraryConfig WithAvx(AvxLevel level) - { - ThrowIfLoaded(); - - _avxLevel = level; - return this; - } - - /// - /// Configure whether to allow fallback when there's no match for preferred settings. - /// - /// - /// - /// Thrown if `LibraryHasLoaded` is true. - public NativeLibraryConfig WithAutoFallback(bool enable = true) - { - ThrowIfLoaded(); - - _allowFallback = enable; - return this; - } - - /// - /// Whether to skip the check when you don't allow fallback. This option - /// may be useful under some complex conditions. For example, you're sure - /// you have your cublas configured but LLamaSharp take it as invalid by mistake. - /// - /// - /// - /// Thrown if `LibraryHasLoaded` is true. - public NativeLibraryConfig SkipCheck(bool enable = true) - { - ThrowIfLoaded(); - - _skipCheck = enable; - return this; - } - - /// - /// Add self-defined search directories. Note that the file stucture of the added - /// directories must be the same as the default directory. Besides, the directory - /// won't be used recursively. - /// - /// - /// - public NativeLibraryConfig WithSearchDirectories(IEnumerable directories) - { - ThrowIfLoaded(); - - _searchDirectories.AddRange(directories); - return this; - } - - /// - /// Add self-defined search directories. Note that the file stucture of the added - /// directories must be the same as the default directory. Besides, the directory - /// won't be used recursively. - /// - /// - /// - public NativeLibraryConfig WithSearchDirectory(string directory) - { - ThrowIfLoaded(); - - _searchDirectories.Add(directory); - return this; - } - #endregion - - internal static Description CheckAndGatherDescription(LibraryName library) - { - if (Instance._allowFallback && Instance._skipCheck) - throw new ArgumentException("Cannot skip the check when fallback is allowed."); - - var path = library switch - { - LibraryName.Llama => Instance._libraryPath, - LibraryName.LlavaShared => Instance._libraryPathLLava, - _ => throw new ArgumentException($"Unknown library name '{library}'", nameof(library)), - }; - - return new Description( - path, - library, - Instance._useCuda, - Instance._avxLevel, - Instance._allowFallback, - Instance._skipCheck, - Instance._searchDirectories.Concat(new[] { "./" }).ToArray() - ); - } - - internal static string AvxLevelToString(AvxLevel level) - { - return level switch - { - AvxLevel.None => string.Empty, - AvxLevel.Avx => "avx", - AvxLevel.Avx2 => "avx2", - AvxLevel.Avx512 => "avx512", - _ => throw new ArgumentException($"Unknown AvxLevel '{level}'") - }; - } - - /// - /// Private constructor prevents new instances of this class being created - /// - private NativeLibraryConfig() - { - // Automatically detect the highest supported AVX level - if (System.Runtime.Intrinsics.X86.Avx.IsSupported) - _avxLevel = AvxLevel.Avx; - if (System.Runtime.Intrinsics.X86.Avx2.IsSupported) - _avxLevel = AvxLevel.Avx2; - - if (CheckAVX512()) - _avxLevel = AvxLevel.Avx512; - } - - private static bool CheckAVX512() - { - if (!System.Runtime.Intrinsics.X86.X86Base.IsSupported) - return false; - - // ReSharper disable UnusedVariable (ebx is used when < NET8) - var (_, ebx, ecx, _) = System.Runtime.Intrinsics.X86.X86Base.CpuId(7, 0); - // ReSharper restore UnusedVariable - - var vnni = (ecx & 0b_1000_0000_0000) != 0; - -#if NET8_0_OR_GREATER - var f = System.Runtime.Intrinsics.X86.Avx512F.IsSupported; - var bw = System.Runtime.Intrinsics.X86.Avx512BW.IsSupported; - var vbmi = System.Runtime.Intrinsics.X86.Avx512Vbmi.IsSupported; -#else - var f = (ebx & (1 << 16)) != 0; - var bw = (ebx & (1 << 30)) != 0; - var vbmi = (ecx & 0b_0000_0000_0010) != 0; -#endif - - return vnni && vbmi && bw && f; - } - - /// - /// Avx support configuration - /// - public enum AvxLevel - { - /// - /// No AVX - /// - None, - - /// - /// Advanced Vector Extensions (supported by most processors after 2011) - /// - Avx, - - /// - /// AVX2 (supported by most processors after 2013) - /// - Avx2, - - /// - /// AVX512 (supported by some processors after 2016, not widely supported) - /// - Avx512, - } - - internal record Description(string? Path, LibraryName Library, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, string[] SearchDirectories) - { - public override string ToString() - { - string avxLevelString = AvxLevel switch - { - AvxLevel.None => "NoAVX", - AvxLevel.Avx => "AVX", - AvxLevel.Avx2 => "AVX2", - AvxLevel.Avx512 => "AVX512", - _ => "Unknown" - }; - - string searchDirectoriesString = "{ " + string.Join(", ", SearchDirectories) + " }"; - - return $"NativeLibraryConfig Description:\n" + - $"- LibraryName: {Library}\n" + - $"- Path: '{Path}'\n" + - $"- PreferCuda: {UseCuda}\n" + - $"- PreferredAvxLevel: {avxLevelString}\n" + - $"- AllowFallback: {AllowFallback}\n" + - $"- SkipCheck: {SkipCheck}\n" + - $"- SearchDirectories and Priorities: {searchDirectoriesString}"; - } - } - } -#endif - - public sealed partial class NativeLibraryConfig - { - /// - /// Get the config instance - /// - public static NativeLibraryConfig Instance { get; } = new(); - - /// - /// Check if the native library has already been loaded. Configuration cannot be modified if this is true. - /// - public static bool LibraryHasLoaded { get; internal set; } - - internal NativeLogConfig.LLamaLogCallback? LogCallback; - - private static void ThrowIfLoaded() - { - if (LibraryHasLoaded) - throw new InvalidOperationException("NativeLibraryConfig must be configured before using **any** other LLamaSharp methods!"); - } - - /// - /// Set the log callback that will be used for all llama.cpp log messages - /// - /// - /// - public NativeLibraryConfig WithLogCallback(NativeLogConfig.LLamaLogCallback? callback) - { - ThrowIfLoaded(); - - LogCallback = callback; - return this; - } - - /// - /// Set the log callback that will be used for all llama.cpp log messages - /// - /// - /// - public NativeLibraryConfig WithLogCallback(ILogger? logger) - { - ThrowIfLoaded(); - - // Redirect to llama_log_set. This will wrap the logger in a delegate and bind that as the log callback instead. - NativeLogConfig.llama_log_set(logger); - - return this; - } - } - - internal enum LibraryName - { - Llama, - LlavaShared - } - - internal static class LibraryNameExtensions - { - public static string GetLibraryName(this LibraryName name) - { - switch (name) - { - case LibraryName.Llama: - return NativeApi.libraryName; - case LibraryName.LlavaShared: - return NativeApi.llavaLibraryName; - default: - throw new ArgumentOutOfRangeException(nameof(name), name, null); - } - } - } -} diff --git a/LLama/Native/NativeLogConfig.cs b/LLama/Native/NativeLogConfig.cs index ebcd23d4..82b097fb 100644 --- a/LLama/Native/NativeLogConfig.cs +++ b/LLama/Native/NativeLogConfig.cs @@ -37,7 +37,7 @@ public static class NativeLogConfig public static void llama_log_set(LLamaLogCallback? logCallback) #pragma warning restore IDE1006 // Naming Styles { - if (NativeLibraryConfig.LibraryHasLoaded) + if (NativeLibraryConfig.LLama.LibraryHasLoaded) { // The library is loaded, just pass the callback directly to llama.cpp native_llama_log_set(logCallback); diff --git a/LLamaSharp.sln b/LLamaSharp.sln index 76334657..065a1e36 100644 --- a/LLamaSharp.sln +++ b/LLamaSharp.sln @@ -17,100 +17,165 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.SemanticKernel", EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.KernelMemory", "LLama.KernelMemory\LLamaSharp.KernelMemory.csproj", "{E5589AE7-B86F-4343-A1CC-8E5D34596E52}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.Experimental", "LLama.Experimental\LLama.Experimental.csproj", "{BE4F977B-D4D9-472F-B506-EAE17542A810}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU + Debug|Arm64 = Debug|Arm64 Debug|x64 = Debug|x64 GPU|Any CPU = GPU|Any CPU + GPU|Arm64 = GPU|Arm64 GPU|x64 = GPU|x64 Release|Any CPU = Release|Any CPU + Release|Arm64 = Release|Arm64 Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Debug|Arm64.ActiveCfg = Debug|Any CPU + {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Debug|Arm64.Build.0 = Debug|Any CPU {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Debug|x64.ActiveCfg = Debug|x64 {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Debug|x64.Build.0 = Debug|x64 {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.GPU|Any CPU.ActiveCfg = Release|Any CPU {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.GPU|Any CPU.Build.0 = Release|Any CPU + {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.GPU|Arm64.ActiveCfg = Release|Any CPU + {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.GPU|Arm64.Build.0 = Release|Any CPU {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.GPU|x64.ActiveCfg = Release|x64 {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.GPU|x64.Build.0 = Release|x64 {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Release|Any CPU.ActiveCfg = Release|Any CPU {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Release|Any CPU.Build.0 = Release|Any CPU + {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Release|Arm64.ActiveCfg = Release|Any CPU + {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Release|Arm64.Build.0 = Release|Any CPU {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Release|x64.ActiveCfg = Release|x64 {BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E}.Release|x64.Build.0 = Release|x64 {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Debug|Arm64.ActiveCfg = Debug|Any CPU + {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Debug|Arm64.Build.0 = Debug|Any CPU {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Debug|x64.ActiveCfg = Debug|x64 {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Debug|x64.Build.0 = Debug|x64 {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.GPU|Any CPU.ActiveCfg = Release|Any CPU {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.GPU|Any CPU.Build.0 = Release|Any CPU + {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.GPU|Arm64.ActiveCfg = Release|Any CPU + {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.GPU|Arm64.Build.0 = Release|Any CPU {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.GPU|x64.ActiveCfg = Release|x64 {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.GPU|x64.Build.0 = Release|x64 {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Release|Any CPU.ActiveCfg = Release|Any CPU {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Release|Any CPU.Build.0 = Release|Any CPU + {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Release|Arm64.ActiveCfg = Release|Any CPU + {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Release|Arm64.Build.0 = Release|Any CPU {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Release|x64.ActiveCfg = Release|x64 {BD1909AD-E1F8-476E-BC49-E394FF0470CE}.Release|x64.Build.0 = Release|x64 {01A12D68-DE95-425E-AEEE-2D099305036D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {01A12D68-DE95-425E-AEEE-2D099305036D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {01A12D68-DE95-425E-AEEE-2D099305036D}.Debug|Arm64.ActiveCfg = Debug|Arm64 + {01A12D68-DE95-425E-AEEE-2D099305036D}.Debug|Arm64.Build.0 = Debug|Arm64 {01A12D68-DE95-425E-AEEE-2D099305036D}.Debug|x64.ActiveCfg = Debug|x64 {01A12D68-DE95-425E-AEEE-2D099305036D}.Debug|x64.Build.0 = Debug|x64 {01A12D68-DE95-425E-AEEE-2D099305036D}.GPU|Any CPU.ActiveCfg = GPU|Any CPU {01A12D68-DE95-425E-AEEE-2D099305036D}.GPU|Any CPU.Build.0 = GPU|Any CPU + {01A12D68-DE95-425E-AEEE-2D099305036D}.GPU|Arm64.ActiveCfg = GPU|Arm64 + {01A12D68-DE95-425E-AEEE-2D099305036D}.GPU|Arm64.Build.0 = GPU|Arm64 {01A12D68-DE95-425E-AEEE-2D099305036D}.GPU|x64.ActiveCfg = GPU|x64 {01A12D68-DE95-425E-AEEE-2D099305036D}.GPU|x64.Build.0 = GPU|x64 {01A12D68-DE95-425E-AEEE-2D099305036D}.Release|Any CPU.ActiveCfg = Release|Any CPU {01A12D68-DE95-425E-AEEE-2D099305036D}.Release|Any CPU.Build.0 = Release|Any CPU + {01A12D68-DE95-425E-AEEE-2D099305036D}.Release|Arm64.ActiveCfg = Release|Arm64 + {01A12D68-DE95-425E-AEEE-2D099305036D}.Release|Arm64.Build.0 = Release|Arm64 {01A12D68-DE95-425E-AEEE-2D099305036D}.Release|x64.ActiveCfg = Release|x64 {01A12D68-DE95-425E-AEEE-2D099305036D}.Release|x64.Build.0 = Release|x64 {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Debug|Arm64.ActiveCfg = Debug|Any CPU + {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Debug|Arm64.Build.0 = Debug|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Debug|x64.ActiveCfg = Debug|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Debug|x64.Build.0 = Debug|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.GPU|Any CPU.ActiveCfg = Debug|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.GPU|Any CPU.Build.0 = Debug|Any CPU + {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.GPU|Arm64.ActiveCfg = Release|Any CPU + {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.GPU|Arm64.Build.0 = Release|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.GPU|x64.ActiveCfg = Debug|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.GPU|x64.Build.0 = Debug|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Release|Any CPU.ActiveCfg = Release|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Release|Any CPU.Build.0 = Release|Any CPU + {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Release|Arm64.ActiveCfg = Release|Any CPU + {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Release|Arm64.Build.0 = Release|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Release|x64.ActiveCfg = Release|Any CPU {D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}.Release|x64.Build.0 = Release|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Debug|Arm64.ActiveCfg = Debug|Any CPU + {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Debug|Arm64.Build.0 = Debug|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Debug|x64.ActiveCfg = Debug|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Debug|x64.Build.0 = Debug|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.GPU|Any CPU.ActiveCfg = Debug|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.GPU|Any CPU.Build.0 = Debug|Any CPU + {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.GPU|Arm64.ActiveCfg = Release|Any CPU + {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.GPU|Arm64.Build.0 = Release|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.GPU|x64.ActiveCfg = Debug|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.GPU|x64.Build.0 = Debug|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Any CPU.Build.0 = Release|Any CPU + {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Arm64.ActiveCfg = Release|Any CPU + {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Arm64.Build.0 = Release|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.ActiveCfg = Release|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.Build.0 = Release|Any CPU {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Arm64.ActiveCfg = Debug|Arm64 + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Arm64.Build.0 = Debug|Arm64 {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.ActiveCfg = Debug|Any CPU {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.Build.0 = Debug|Any CPU {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.ActiveCfg = Debug|Any CPU {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.Build.0 = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Arm64.ActiveCfg = GPU|Arm64 + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Arm64.Build.0 = GPU|Arm64 {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.ActiveCfg = Debug|Any CPU {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.Build.0 = Debug|Any CPU {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.ActiveCfg = Release|Any CPU {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.Build.0 = Release|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Arm64.ActiveCfg = Release|Arm64 + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Arm64.Build.0 = Release|Arm64 {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.ActiveCfg = Release|Any CPU {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.Build.0 = Release|Any CPU {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Debug|Arm64.ActiveCfg = Debug|Arm64 + {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Debug|Arm64.Build.0 = Debug|Arm64 {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Debug|x64.ActiveCfg = Debug|Any CPU {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Debug|x64.Build.0 = Debug|Any CPU {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.GPU|Any CPU.ActiveCfg = Debug|Any CPU {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.GPU|Any CPU.Build.0 = Debug|Any CPU + {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.GPU|Arm64.ActiveCfg = GPU|Arm64 + {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.GPU|Arm64.Build.0 = GPU|Arm64 {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.GPU|x64.ActiveCfg = Debug|Any CPU {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.GPU|x64.Build.0 = Debug|Any CPU {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Release|Any CPU.ActiveCfg = Release|Any CPU {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Release|Any CPU.Build.0 = Release|Any CPU + {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Release|Arm64.ActiveCfg = Release|Arm64 + {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Release|Arm64.Build.0 = Release|Arm64 {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Release|x64.ActiveCfg = Release|Any CPU {E5589AE7-B86F-4343-A1CC-8E5D34596E52}.Release|x64.Build.0 = Release|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Debug|Arm64.ActiveCfg = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Debug|Arm64.Build.0 = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Debug|x64.ActiveCfg = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Debug|x64.Build.0 = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.GPU|Any CPU.ActiveCfg = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.GPU|Any CPU.Build.0 = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.GPU|Arm64.ActiveCfg = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.GPU|Arm64.Build.0 = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.GPU|x64.ActiveCfg = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.GPU|x64.Build.0 = Debug|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Release|Any CPU.ActiveCfg = Release|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Release|Any CPU.Build.0 = Release|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Release|Arm64.ActiveCfg = Release|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Release|Arm64.Build.0 = Release|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Release|x64.ActiveCfg = Release|Any CPU + {BE4F977B-D4D9-472F-B506-EAE17542A810}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE