1
1
cmake_minimum_required (VERSION 3.13 ) # for add_link_options
2
2
project ("llama.cpp" C CXX )
3
3
4
- set (CMAKE_EXPORT_COMPILE_COMMANDS ON )
4
+ if (NOT MSVC )
5
+ set (cuda_flags -Wno-pedantic )
6
+ endif ()
5
7
8
+ set (LLAMA_CUBLAS ON )
9
+ set (CMAKE_EXPORT_COMPILE_COMMANDS ON )
10
+ set (LLAMA_CUDA_F16 ON )
11
+ set (LLAMA_ACCELERATE ON )
12
+ set (LLAMA_K_QUANTS ON )
13
+
14
+ #-DLLAMA_NATIVE=off
15
+ set (LLAMA_AVX ON )
16
+ set (LLAMA_AVX2 OFF )
17
+ set (LLAMA_AVX512 OFF )
18
+ set (LLAMA_FMA OFF )
19
+ set (LLAMA_F16C OFF )
20
+ set (CMAKE_CUDA_FLAGS "--verbose" ) #
21
+ set (CMAKE_CUDA_ARCHITECTURES "60;61;70" ) # needed for f16 CUDA intrinsics
22
+ set (CUDACXX /usr/local/cuda-12.3/bin/nvcc )
23
+ set (CMAKE_CUDA_COMPILER /usr/local/cuda-12.3/bin/nvcc )
24
+ set (CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda-12.3 )
25
+ #GGML_USE_CUBLAS
26
+
27
+ #set(CMAKE_EXE_LINKER_FLAGS -pg)
28
+ #set(CMAKE_SHARED_LINKER_FLAGS -pg)
29
+
30
+ set (CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE )
31
+
6
32
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE )
7
33
set (CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE )
8
34
set_property (CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" )
@@ -44,7 +70,7 @@ endif()
44
70
45
71
# general
46
72
option (LLAMA_STATIC "llama: static link libraries" OFF )
47
- option (LLAMA_NATIVE "llama: enable -march=native flag" ON )
73
+ option (LLAMA_NATIVE "llama: enable -march=native flag" OFF )
48
74
option (LLAMA_LTO "llama: enable link time optimization" OFF )
49
75
50
76
# debug
@@ -77,9 +103,9 @@ endif()
77
103
78
104
# 3rd party libs
79
105
option (LLAMA_ACCELERATE "llama: enable Accelerate framework" ON )
80
- option (LLAMA_BLAS "llama: use BLAS" OFF )
106
+ option (LLAMA_BLAS "llama: use BLAS" ON )
81
107
set (LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor" )
82
- option (LLAMA_CUBLAS "llama: use CUDA" OFF )
108
+ option (LLAMA_CUBLAS "llama: use CUDA" ON )
83
109
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
84
110
option (LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF )
85
111
option (LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF )
@@ -104,7 +130,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example"
104
130
# Compile flags
105
131
#
106
132
107
- set (CMAKE_CXX_STANDARD 11 )
133
+ set (CMAKE_CXX_STANDARD 17 )
108
134
set (CMAKE_CXX_STANDARD_REQUIRED true )
109
135
set (CMAKE_C_STANDARD 11 )
110
136
set (CMAKE_C_STANDARD_REQUIRED true )
@@ -230,7 +256,12 @@ if (LLAMA_BLAS)
230
256
231
257
message (STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS} " )
232
258
add_compile_options (${BLAS_LINKER_FLAGS} )
233
- add_compile_definitions (GGML_USE_OPENBLAS )
259
+
260
+ # from https://github.com/NVIDIA/cutlass
261
+ make_directory ("${PROJECT_BINARY_DIR} /nvcc_tmp" )
262
+ set (cuda_flags --keep "SHELL:--keep-dir ${PROJECT_BINARY_DIR} /nvcc_tmp" ${cuda_flags} )
263
+
264
+ # add_compile_definitions(GGML_USE_OPENBLAS)
234
265
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel" ))
235
266
add_compile_definitions (GGML_BLAS_USE_MKL )
236
267
endif ()
@@ -272,6 +303,7 @@ if (LLAMA_CUBLAS)
272
303
endif ()
273
304
add_compile_definitions (GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
274
305
add_compile_definitions (GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
306
+
275
307
if (DEFINED LLAMA_CUDA_DMMV_Y )
276
308
add_compile_definitions (GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y} ) # for backwards compatibility
277
309
endif ()
@@ -312,7 +344,7 @@ if (LLAMA_MPI)
312
344
if (MPI_C_FOUND )
313
345
message (STATUS "MPI found" )
314
346
set (GGML_HEADERS_MPI ggml-mpi.h )
315
- set (GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h )
347
+ set (GGML_SOURCES_MPI ggml-mpi.cpp ggml-mpi.h )
316
348
add_compile_definitions (GGML_USE_MPI )
317
349
add_compile_definitions (${MPI_C_COMPILE_DEFINITIONS} )
318
350
if (NOT MSVC )
@@ -390,14 +422,15 @@ endif()
390
422
391
423
if (LLAMA_ALL_WARNINGS )
392
424
if (NOT MSVC )
393
- set (warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function )
425
+ # -Wpedantic
426
+ set (warning_flags -Wall -Wextra -Wcast-qual -Wno-unused-function )
394
427
set (c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration )
395
- set (cxx_flags -Wmissing-declarations -Wmissing-noreturn )
428
+ set (cxx_flags -Wmissing-declarations -Wmissing-noreturn -fpermissive )
396
429
set (host_cxx_flags "" )
397
430
398
431
if (CMAKE_C_COMPILER_ID MATCHES "Clang" )
399
432
set (warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return )
400
- set (host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi )
433
+ set (host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi -fpermissive )
401
434
402
435
if (
403
436
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0 ) OR
@@ -407,37 +440,37 @@ if (LLAMA_ALL_WARNINGS)
407
440
endif ()
408
441
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU" )
409
442
set (c_flags ${c_flags} -Wdouble-promotion )
410
- set (host_cxx_flags ${host_cxx_flags} -Wno-array-bounds )
443
+ set (host_cxx_flags ${host_cxx_flags} -Wno-array-bounds -fpermissive )
411
444
412
445
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0 )
413
- set (host_cxx_flags ${host_cxx_flags} -Wno-format-truncation )
446
+ set (host_cxx_flags ${host_cxx_flags} -Wno-format-truncation -fpermissive )
414
447
endif ()
415
448
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0 )
416
- set (host_cxx_flags ${host_cxx_flags} -Wextra-semi )
449
+ set (host_cxx_flags ${host_cxx_flags} -Wextra-semi -fpermissive )
417
450
endif ()
418
451
endif ()
419
452
else ()
420
453
# todo : msvc
421
454
endif ()
422
455
423
- set (c_flags ${c_flags} ${warning_flags} )
424
- set (cxx_flags ${cxx_flags} ${warning_flags} )
456
+ set (c_flags ${c_flags} -save-temps --verbose ${warning_flags} )
457
+ set (cxx_flags ${cxx_flags} -fpermissive -save-temps --verbose ${warning_flags} )
425
458
add_compile_options ("$<$<COMPILE_LANGUAGE:C>:${c_flags} >"
426
459
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags} >"
427
460
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags} >" )
428
461
429
462
endif ()
430
463
431
- if (NOT MSVC )
432
- set (cuda_flags -Wno-pedantic )
433
- endif ()
434
464
set (cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags} )
435
465
436
466
list (JOIN host_cxx_flags " " cuda_host_flags ) # pass host compiler flags as a single argument
437
467
if (NOT cuda_host_flags STREQUAL "" )
438
468
set (cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags} )
439
469
endif ()
440
470
471
+ #
472
+ set (cuda_flags --verbose -G ${cuda_flags} )
473
+
441
474
add_compile_options ("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags} >" )
442
475
443
476
if (WIN32 )
@@ -485,8 +518,10 @@ if (NOT MSVC)
485
518
add_link_options (-static-libgcc -static-libstdc++ )
486
519
endif ()
487
520
endif ()
521
+ add_link_options ("-Wl,-Map=${TARGET} .map" )
522
+
488
523
if (LLAMA_GPROF )
489
- add_compile_options (-pg )
524
+ add_compile_options (-pg )
490
525
endif ()
491
526
endif ()
492
527
@@ -645,13 +680,13 @@ if (GGML_USE_CPU_HBM)
645
680
endif ()
646
681
647
682
add_library (ggml OBJECT
648
- ggml.c
683
+ ggml.cpp
649
684
ggml.h
650
- ggml-alloc.c
685
+ ggml-alloc.cpp
651
686
ggml-alloc.h
652
- ggml-backend.c
687
+ ggml-backend.cpp
653
688
ggml-backend.h
654
- ggml-quants.c
689
+ ggml-quants.cpp
655
690
ggml-quants.h
656
691
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
657
692
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
0 commit comments