{ "Logging": { "LogLevel": { "Default": "Information", "Microsoft.AspNetCore": "Warning" } }, "AllowedHosts": "*", "LLamaOptions": { "ModelLoadType": 0, "Models": [ { "Name": "LLama2-7b-Chat", "MaxInstances": 20, "ModelPath": "..\\LLama.Unittest\\Models\\llama-2-7b-chat.Q4_0.gguf", "ContextSize": 2048, "BatchSize": 2048, "Threads": 4, "GpuLayerCount": 6, "UseMemorymap": true, "UseMemoryLock": false, "MainGpu": 0, "LowVram": false, "Seed": 1686349486, "UseFp16Memory": true, "Perplexity": false, "LoraAdapter": "", "LoraBase": "", "EmbeddingMode": false, "TensorSplits": null, "GroupedQueryAttention": 1, "RmsNormEpsilon": 0.000005, "RopeFrequencyBase": 10000.0, "RopeFrequencyScale": 1.0, "MulMatQ": false, "Encoding": "UTF-8" } ] } }