| # BEGIN GENERAL GGUF METADATA | |
| id: aya-expanse:32b # Model ID unique between models (author / quantization) | |
| model: aya-expanse:32b # Model ID which is used for request construct - should be unique between models (author / quantization) | |
| name: aya-expanse:32b # metadata.general.name | |
| version: 1 # metadata.version | |
| # END GENERAL GGUF METADATA | |
| # BEGIN INFERENCE PARAMETERS | |
| # BEGIN REQUIRED | |
| stop: # tokenizer.ggml.eos_token_id | |
| - <|END_OF_TURN_TOKEN|> | |
| # END REQUIRED | |
| # BEGIN OPTIONAL | |
| stream: true # Default true? | |
| top_p: 0.9 # Ranges: 0 to 1 | |
| temperature: 0.7 # Ranges: 0 to 1 | |
| frequency_penalty: 0 # Ranges: 0 to 1 | |
| presence_penalty: 0 # Ranges: 0 to 1 | |
| max_tokens: 4096 # Should be default to context length | |
| seed: -1 | |
| dynatemp_range: 0 | |
| dynatemp_exponent: 1 | |
| top_k: 40 | |
| min_p: 0.05 | |
| tfs_z: 1 | |
| typ_p: 1 | |
| repeat_last_n: 64 | |
| repeat_penalty: 1 | |
| mirostat: false | |
| mirostat_tau: 5 | |
| mirostat_eta: 0.100000001 | |
| penalize_nl: false | |
| ignore_eos: false | |
| n_probs: 0 | |
| min_keep: 0 | |
| # END OPTIONAL | |
| # END INFERENCE PARAMETERS | |
| # BEGIN MODEL LOAD PARAMETERS | |
| # BEGIN REQUIRED | |
| engine: llama-cpp # engine to run model | |
| prompt_template: "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_message}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" | |
| # END REQUIRED | |
| # BEGIN OPTIONAL | |
| ctx_len: 4096 # llama.context_length | 0 or undefined = loaded from model | |
| # END OPTIONAL | |
| # END MODEL LOAD PARAMETERS | |
| ngl: 65 | |