diff --git a/README.md b/README.md index 47a93fa2..6935efe9 100644 --- a/README.md +++ b/README.md @@ -94,56 +94,49 @@ LLamaSharp provides two ways to run inference: `LLamaExecutor` and `ChatSession` using LLama.Common; using LLama; -string modelPath = ""; // change it to your own model path -var prompt = "Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.\r\n\r\nUser: Hello, Bob.\r\nBob: Hello. How may I help you today?\r\nUser: Please tell me the largest city in Europe.\r\nBob: Sure. The largest city in Europe is Moscow, the capital of Russia.\r\nUser:"; // use the "chat-with-bob" prompt here. +string modelPath = @""; // change it to your own model path. -// Load a model var parameters = new ModelParams(modelPath) { - ContextSize = 1024, - Seed = 1337, - GpuLayerCount = 5 + ContextSize = 1024, // The longest length of chat as memory. + GpuLayerCount = 5 // How many layers to offload to GPU. Please adjust it according to your GPU memory. }; using var model = LLamaWeights.LoadFromFile(parameters); - -// Initialize a chat session using var context = model.CreateContext(parameters); -var ex = new InteractiveExecutor(context); -ChatSession session = new ChatSession(ex); - -// show the prompt -Console.WriteLine(); -Console.Write(prompt); +var executor = new InteractiveExecutor(context); -// run the inference in a loop to chat with LLM -while (prompt != "stop") -{ - await foreach (var text in session.ChatAsync(new ChatHistory.Message(AuthorRole.User, prompt), new InferenceParams { Temperature = 0.6f, AntiPrompts = [ "User:" ] })) - { - Console.Write(text); - } - prompt = Console.ReadLine() ?? ""; -} +// Add chat histories as prompt to tell AI how to act. +var chatHistory = new ChatHistory(); +chatHistory.AddMessage(AuthorRole.System, "Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision."); +chatHistory.AddMessage(AuthorRole.User, "Hello, Bob."); +chatHistory.AddMessage(AuthorRole.Assistant, "Hello. How may I help you today?"); -// save the session -session.SaveSession("SavedSessionPath"); -``` +ChatSession session = new(executor, chatHistory); -#### Quantization +InferenceParams inferenceParams = new InferenceParams() +{ + MaxTokens = 256, // No more than 256 tokens should appear in answer. Remove it if antiprompt is enough for control. + AntiPrompts = new List { "User:" } // Stop generation once antiprompts appear. +}; -The following example shows how to quantize the model: +Console.ForegroundColor = ConsoleColor.Yellow; +Console.Write("The chat session has started.\nUser: "); +Console.ForegroundColor = ConsoleColor.Green; +string userInput = Console.ReadLine() ?? ""; -```cs -string srcFilename = ""; -string dstFilename = ""; -string ftype = "q4_0"; -if(Quantizer.Quantize(srcFileName, dstFilename, ftype)) -{ - Console.WriteLine("Quantization succeed!"); -} -else +while (userInput != "exit") { - Console.WriteLine("Quantization failed!"); + await foreach ( // Generate the response streamingly. + var text + in session.ChatAsync( + new ChatHistory.Message(AuthorRole.User, userInput), + inferenceParams)) + { + Console.ForegroundColor = ConsoleColor.White; + Console.Write(text); + } + Console.ForegroundColor = ConsoleColor.Green; + userInput = Console.ReadLine() ?? ""; } ```