Skip to content

Commit ee9b0bc

Browse files
author
mike dupont
committed
rebased and trimmed down
now compiling again,
1 parent 8e672ef commit ee9b0bc

15 files changed

+2317
-838
lines changed

CMakeLists.txt

Lines changed: 58 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,34 @@
11
cmake_minimum_required(VERSION 3.13) # for add_link_options
22
project("llama.cpp" C CXX)
33

4-
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
4+
if (NOT MSVC)
5+
set(cuda_flags -Wno-pedantic)
6+
endif()
57

8+
set(LLAMA_CUBLAS ON)
9+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
10+
set(LLAMA_CUDA_F16 ON)
11+
set(LLAMA_ACCELERATE ON)
12+
set(LLAMA_K_QUANTS ON)
13+
14+
#-DLLAMA_NATIVE=off
15+
set(LLAMA_AVX ON)
16+
set(LLAMA_AVX2 OFF)
17+
set(LLAMA_AVX512 OFF)
18+
set(LLAMA_FMA OFF)
19+
set(LLAMA_F16C OFF)
20+
set(CMAKE_CUDA_FLAGS "--verbose") #
21+
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
22+
set(CUDACXX /usr/local/cuda-12.3/bin/nvcc)
23+
set(CMAKE_CUDA_COMPILER /usr/local/cuda-12.3/bin/nvcc)
24+
set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda-12.3)
25+
#GGML_USE_CUBLAS
26+
27+
#set(CMAKE_EXE_LINKER_FLAGS -pg)
28+
#set(CMAKE_SHARED_LINKER_FLAGS -pg)
29+
30+
set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE)
31+
632
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
733
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
834
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
@@ -44,7 +70,7 @@ endif()
4470

4571
# general
4672
option(LLAMA_STATIC "llama: static link libraries" OFF)
47-
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
73+
option(LLAMA_NATIVE "llama: enable -march=native flag" OFF)
4874
option(LLAMA_LTO "llama: enable link time optimization" OFF)
4975

5076
# debug
@@ -77,9 +103,9 @@ endif()
77103

78104
# 3rd party libs
79105
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
80-
option(LLAMA_BLAS "llama: use BLAS" OFF)
106+
option(LLAMA_BLAS "llama: use BLAS" ON)
81107
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
82-
option(LLAMA_CUBLAS "llama: use CUDA" OFF)
108+
option(LLAMA_CUBLAS "llama: use CUDA" ON)
83109
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
84110
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
85111
option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF)
@@ -104,7 +130,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example"
104130
# Compile flags
105131
#
106132

107-
set(CMAKE_CXX_STANDARD 11)
133+
set(CMAKE_CXX_STANDARD 17)
108134
set(CMAKE_CXX_STANDARD_REQUIRED true)
109135
set(CMAKE_C_STANDARD 11)
110136
set(CMAKE_C_STANDARD_REQUIRED true)
@@ -230,7 +256,12 @@ if (LLAMA_BLAS)
230256

231257
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
232258
add_compile_options(${BLAS_LINKER_FLAGS})
233-
add_compile_definitions(GGML_USE_OPENBLAS)
259+
260+
# from https://github.com/NVIDIA/cutlass
261+
make_directory("${PROJECT_BINARY_DIR}/nvcc_tmp")
262+
set(cuda_flags --keep "SHELL:--keep-dir ${PROJECT_BINARY_DIR}/nvcc_tmp" ${cuda_flags})
263+
264+
# add_compile_definitions(GGML_USE_OPENBLAS)
234265
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
235266
add_compile_definitions(GGML_BLAS_USE_MKL)
236267
endif()
@@ -272,6 +303,7 @@ if (LLAMA_CUBLAS)
272303
endif()
273304
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
274305
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
306+
275307
if (DEFINED LLAMA_CUDA_DMMV_Y)
276308
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
277309
endif()
@@ -312,7 +344,7 @@ if (LLAMA_MPI)
312344
if (MPI_C_FOUND)
313345
message(STATUS "MPI found")
314346
set(GGML_HEADERS_MPI ggml-mpi.h)
315-
set(GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h)
347+
set(GGML_SOURCES_MPI ggml-mpi.cpp ggml-mpi.h)
316348
add_compile_definitions(GGML_USE_MPI)
317349
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
318350
if (NOT MSVC)
@@ -390,14 +422,15 @@ endif()
390422

391423
if (LLAMA_ALL_WARNINGS)
392424
if (NOT MSVC)
393-
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
425+
# -Wpedantic
426+
set(warning_flags -Wall -Wextra -Wcast-qual -Wno-unused-function)
394427
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
395-
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
428+
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn -fpermissive)
396429
set(host_cxx_flags "")
397430

398431
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
399432
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
400-
set(host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi)
433+
set(host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi -fpermissive)
401434

402435
if (
403436
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
@@ -407,37 +440,37 @@ if (LLAMA_ALL_WARNINGS)
407440
endif()
408441
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
409442
set(c_flags ${c_flags} -Wdouble-promotion)
410-
set(host_cxx_flags ${host_cxx_flags} -Wno-array-bounds)
443+
set(host_cxx_flags ${host_cxx_flags} -Wno-array-bounds -fpermissive)
411444

412445
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
413-
set(host_cxx_flags ${host_cxx_flags} -Wno-format-truncation)
446+
set(host_cxx_flags ${host_cxx_flags} -Wno-format-truncation -fpermissive)
414447
endif()
415448
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
416-
set(host_cxx_flags ${host_cxx_flags} -Wextra-semi)
449+
set(host_cxx_flags ${host_cxx_flags} -Wextra-semi -fpermissive)
417450
endif()
418451
endif()
419452
else()
420453
# todo : msvc
421454
endif()
422455

423-
set(c_flags ${c_flags} ${warning_flags})
424-
set(cxx_flags ${cxx_flags} ${warning_flags})
456+
set(c_flags ${c_flags} -save-temps --verbose ${warning_flags})
457+
set(cxx_flags ${cxx_flags} -fpermissive -save-temps --verbose ${warning_flags})
425458
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
426459
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
427460
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
428461

429462
endif()
430463

431-
if (NOT MSVC)
432-
set(cuda_flags -Wno-pedantic)
433-
endif()
434464
set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
435465

436466
list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
437467
if (NOT cuda_host_flags STREQUAL "")
438468
set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
439469
endif()
440470

471+
#
472+
set(cuda_flags --verbose -G ${cuda_flags})
473+
441474
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
442475

443476
if (WIN32)
@@ -485,8 +518,10 @@ if (NOT MSVC)
485518
add_link_options(-static-libgcc -static-libstdc++)
486519
endif()
487520
endif()
521+
add_link_options("-Wl,-Map=${TARGET}.map")
522+
488523
if (LLAMA_GPROF)
489-
add_compile_options(-pg)
524+
add_compile_options(-pg)
490525
endif()
491526
endif()
492527

@@ -645,13 +680,13 @@ if (GGML_USE_CPU_HBM)
645680
endif()
646681

647682
add_library(ggml OBJECT
648-
ggml.c
683+
ggml.cpp
649684
ggml.h
650-
ggml-alloc.c
685+
ggml-alloc.cpp
651686
ggml-alloc.h
652-
ggml-backend.c
687+
ggml-backend.cpp
653688
ggml-backend.h
654-
ggml-quants.c
689+
ggml-quants.cpp
655690
ggml-quants.h
656691
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
657692
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}

Makefile

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ endif
116116
# keep standard at C11 and C++11
117117
MK_CPPFLAGS = -I. -Icommon
118118
MK_CFLAGS = -std=c11 -fPIC
119-
MK_CXXFLAGS = -std=c++11 -fPIC
119+
MK_CXXFLAGS = -std=c++17 -fPIC -fpermissive
120120

121121
# -Ofast tends to produce faster code, but may not be available for some compilers.
122122
ifdef LLAMA_FAST
@@ -502,7 +502,7 @@ ggml-metal.o: ggml-metal.m ggml-metal.h
502502
endif # LLAMA_METAL
503503

504504
ifdef LLAMA_MPI
505-
ggml-mpi.o: ggml-mpi.c ggml-mpi.h
505+
ggml-mpi.o: ggml-mpi.cpp ggml-mpi.h
506506
$(CC) $(CFLAGS) -c $< -o $@
507507
endif # LLAMA_MPI
508508

@@ -537,17 +537,17 @@ $(info )
537537
# Build library
538538
#
539539

540-
ggml.o: ggml.c ggml.h ggml-cuda.h
541-
$(CC) $(CFLAGS) -c $< -o $@
540+
ggml.o: ggml.cpp ggml.h ggml-cuda.h
541+
$(CXX) $(CXXFLAGS) -c $< -o $@
542542

543-
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
544-
$(CC) $(CFLAGS) -c $< -o $@
543+
ggml-alloc.o: ggml-alloc.cpp ggml.h ggml-alloc.h
544+
$(CXX) $(CXXFLAGS) -c $< -o $@
545545

546-
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
547-
$(CC) $(CFLAGS) -c $< -o $@
546+
ggml-backend.o: ggml-backend.cpp ggml.h ggml-backend.h
547+
$(CXX) $(CXXFLAGS) -c $< -o $@
548548

549-
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
550-
$(CC) $(CFLAGS) -c $< -o $@
549+
ggml-quants.o: ggml-quants.cpp ggml.h ggml-quants.h
550+
$(CXX) $(CXXFLAGS) -c $< -o $@
551551

552552
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o
553553

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,7 @@ PROMPT_TEMPLATE=./prompts/chat-with-bob.txt PROMPT_CACHE_FILE=bob.prompt.bin \
696696
697697
The `grammars/` folder contains a handful of sample grammars. To write your own, check out the [GBNF Guide](./grammars/README.md).
698698
699-
For authoring more complex JSON grammars, you can also check out https://grammar.intrinsiclabs.ai/, a browser app that lets you write TypeScript interfaces which it compiles to GBNF grammars that you can save for local use. Note that the app is built and maintained by members of the community, please file any issues or FRs on [its repo](http://github.com/intrinsiclabsai/gbnfgen) and not this one.
699+
For authoring more complex JSON grammars, you can also check out https://grammar.intrinsiclabs.ai/, a browser app that lets ygou write TypeScript interfaces which it compiles to GBNF grammars that you can save for local use. Note that the app is built and maintained by members of the community, please file any issues or FRs on [its repo](http://github.com/intrinsiclabsai/gbnfgen) and not this one.
700700
701701
### Instruction mode with Alpaca
702702

0 commit comments

Comments
 (0)