File tree Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -264,6 +264,7 @@ extern "C" {
264
264
bool check_tensors; // validate model tensor data
265
265
};
266
266
267
+ // NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
267
268
struct llama_context_params {
268
269
uint32_t seed; // RNG seed, -1 for random
269
270
uint32_t n_ctx; // text context, 0 = from model
@@ -290,14 +291,14 @@ extern "C" {
290
291
ggml_backend_sched_eval_callback cb_eval;
291
292
void * cb_eval_user_data;
292
293
293
- enum ggml_type type_k; // data type for K cache
294
- enum ggml_type type_v; // data type for V cache
294
+ enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
295
+ enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
295
296
296
297
// Keep the booleans together to avoid misalignment during copy-by-value.
297
298
bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
298
299
bool embeddings; // if true, extract embeddings (together with logits)
299
300
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
300
- bool flash_attn; // whether to use flash attention
301
+ bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
301
302
302
303
// Abort callback
303
304
// if it returns true, execution of llama_decode() will be aborted
You can’t perform that action at this time.
0 commit comments