scisharp
/
LLamaSharp

 
			
			   
				 
					
						
						
							
							{
  "Logging": {
    "LogLevel": {
      "Default": "Information",
      "Microsoft.AspNetCore": "Warning"
    }
  },
  "AllowedHosts": "*",
  "LLamaOptions": {
    "ModelLoadType": 0,
    "Models": [
      {
        "Name": "LLama2-7b-Chat",
        "MaxInstances": 20,
        "ModelPath": "..\\LLama.Unittest\\Models\\llama-2-7b-chat.Q4_0.gguf",
        "ContextSize": 2048,
        "BatchSize": 2048,
        "Threads": 4,
        "GpuLayerCount": 6,
        "UseMemorymap": true,
        "UseMemoryLock": false,
        "MainGpu": 0,
        "LowVram": false,
        "Seed": 1686349486,
        "UseFp16Memory": true,
        "Perplexity": false,
        "LoraAdapter": "",
        "LoraBase": "",
        "EmbeddingMode": false,
        "TensorSplits": null,
        "GroupedQueryAttention": 1,
        "RmsNormEpsilon": 0.000005,
        "RopeFrequencyBase": 10000.0,
        "RopeFrequencyScale": 1.0,
        "MulMatQ": false,
        "Encoding": "UTF-8"
      }
    ]
  }
}