@@ -56,6 +56,15 @@ else()
56
56
set (GGML_NATIVE_DEFAULT ON )
57
57
endif ()
58
58
59
+ # defaults
60
+ if (NOT GGML_LLAMAFILE_DEFAULT )
61
+ set (GGML_LLAMAFILE_DEFAULT OFF )
62
+ endif ()
63
+
64
+ if (NOT GGML_CUDA_USE_GRAPHS_DEFAULT )
65
+ set (GGML_CUDA_USE_GRAPHS_DEFAULT OFF )
66
+ endif ()
67
+
59
68
# general
60
69
option (GGML_STATIC "ggml: static link libraries" OFF )
61
70
option (GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT} )
@@ -110,7 +119,7 @@ option(GGML_ACCELERATE "ggml: enable Accelerate framework"
110
119
option (GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT} )
111
120
set (GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
112
121
"ggml: BLAS library vendor" )
113
- option (GGML_LLAMAFILE "ggml: use LLAMAFILE" OFF )
122
+ option (GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT} )
114
123
115
124
option (GGML_CUDA "ggml: use CUDA" OFF )
116
125
option (GGML_MUSA "ggml: use MUSA" OFF )
@@ -127,7 +136,7 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
127
136
option (GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF )
128
137
option (GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF )
129
138
option (GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF )
130
- option (GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF )
139
+ option (GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_USE_GRAPHS_DEFAULT} )
131
140
132
141
option (GGML_HIPBLAS "ggml: use hipBLAS" OFF )
133
142
option (GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF )
0 commit comments