Skip to content

Commit aa0de27

Browse files
committed
llama : add comments about experimental flags
1 parent b9adcbb commit aa0de27

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

llama.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,8 @@ extern "C" {
264264
bool check_tensors; // validate model tensor data
265265
};
266266

267+
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
268+
// https://github.com/ggerganov/llama.cpp/pull/7544
267269
struct llama_context_params {
268270
uint32_t seed; // RNG seed, -1 for random
269271
uint32_t n_ctx; // text context, 0 = from model
@@ -290,14 +292,14 @@ extern "C" {
290292
ggml_backend_sched_eval_callback cb_eval;
291293
void * cb_eval_user_data;
292294

293-
enum ggml_type type_k; // data type for K cache
294-
enum ggml_type type_v; // data type for V cache
295+
enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
296+
enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
295297

296298
// Keep the booleans together to avoid misalignment during copy-by-value.
297299
bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
298300
bool embeddings; // if true, extract embeddings (together with logits)
299301
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
300-
bool flash_attn; // whether to use flash attention
302+
bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
301303

302304
// Abort callback
303305
// if it returns true, execution of llama_decode() will be aborted

0 commit comments

Comments
 (0)