1
1
# Define the default target now so that it is always the first target
2
- BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server libembdinput.so embd-input-test
2
+ BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server embd-input-test
3
+
4
+ # Binaries only useful for tests
5
+ TEST_TARGETS = tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0
3
6
4
7
default : $(BUILD_TARGETS )
5
8
@@ -60,7 +63,8 @@ ifdef LLAMA_SERVER_VERBOSE
60
63
endif
61
64
62
65
# warnings
63
- CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith
66
+ CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
67
+ -Wmissing-prototypes
64
68
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
65
69
66
70
# OS specific
@@ -90,6 +94,28 @@ ifeq ($(UNAME_S),Haiku)
90
94
CXXFLAGS += -pthread
91
95
endif
92
96
97
+ # detect Windows
98
+ ifneq ($(findstring _NT,$(UNAME_S ) ) ,)
99
+ _WIN32 := 1
100
+ endif
101
+
102
+ # library name prefix
103
+ ifneq ($(_WIN32 ) ,1)
104
+ LIB_PRE := lib
105
+ endif
106
+
107
+ # Dynamic Shared Object extension
108
+ ifneq ($(_WIN32 ) ,1)
109
+ DSO_EXT := .so
110
+ else
111
+ DSO_EXT := .dll
112
+ endif
113
+
114
+ # Windows Sockets 2 (Winsock) for network-capable apps
115
+ ifeq ($(_WIN32 ) ,1)
116
+ LWINSOCK2 := -lws2_32
117
+ endif
118
+
93
119
ifdef LLAMA_GPROF
94
120
CFLAGS += -pg
95
121
CXXFLAGS += -pg
@@ -102,7 +128,7 @@ endif
102
128
# Architecture specific
103
129
# TODO: probably these flags need to be tweaked on some architectures
104
130
# feel free to update the Makefile for your architecture and send a pull request or issue
105
- ifeq ($(UNAME_M ) ,$(filter $(UNAME_M ) ,x86_64 i686) )
131
+ ifeq ($(UNAME_M ) ,$(filter $(UNAME_M ) ,x86_64 i686 amd64 ) )
106
132
# Use all CPU extensions that are available:
107
133
CFLAGS += -march=native -mtune=native
108
134
CXXFLAGS += -march=native -mtune=native
@@ -168,8 +194,12 @@ ifdef LLAMA_CUBLAS
168
194
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
169
195
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
170
196
OBJS += ggml-cuda.o
171
- NVCC = nvcc
172
- NVCCFLAGS = --forward-unknown-to-host-compiler
197
+ NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
198
+ ifdef LLAMA_CUDA_NVCC
199
+ NVCC = $(LLAMA_CUDA_NVCC)
200
+ else
201
+ NVCC = nvcc
202
+ endif # LLAMA_CUDA_NVCC
173
203
ifdef CUDA_DOCKER_ARCH
174
204
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
175
205
else
@@ -190,27 +220,42 @@ else ifdef LLAMA_CUDA_DMMV_Y
190
220
else
191
221
NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
192
222
endif # LLAMA_CUDA_MMV_Y
223
+ ifdef LLAMA_CUDA_F16
224
+ NVCCFLAGS += -DGGML_CUDA_F16
225
+ endif # LLAMA_CUDA_F16
193
226
ifdef LLAMA_CUDA_DMMV_F16
194
- NVCCFLAGS += -DGGML_CUDA_DMMV_F16
227
+ NVCCFLAGS += -DGGML_CUDA_F16
195
228
endif # LLAMA_CUDA_DMMV_F16
196
229
ifdef LLAMA_CUDA_KQUANTS_ITER
197
230
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
198
231
else
199
232
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
200
233
endif
201
-
234
+ ifdef LLAMA_CUDA_MMQ_Y
235
+ NVCCFLAGS += -DGGML_CUDA_MMQ_Y=$(LLAMA_CUDA_MMQ_Y)
236
+ else
237
+ NVCCFLAGS += -DGGML_CUDA_MMQ_Y=64
238
+ endif # LLAMA_CUDA_MMQ_Y
239
+ # ifdef LLAMA_CUDA_CUBLAS
240
+ # NVCCFLAGS += -DGGML_CUDA_CUBLAS
241
+ # endif # LLAMA_CUDA_CUBLAS
242
+ ifdef LLAMA_CUDA_CCBIN
243
+ NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
244
+ endif
202
245
ggml-cuda.o : ggml-cuda.cu ggml-cuda.h
203
- $(NVCC ) $(NVCCFLAGS ) $(CXXFLAGS ) -Wno-pedantic -c $< -o $@
246
+ $(NVCC ) $(NVCCFLAGS ) $(subst -Ofast,-O3, $( CXXFLAGS ) ) -Wno-pedantic -c $< -o $@
204
247
endif # LLAMA_CUBLAS
205
248
206
249
ifdef LLAMA_CLBLAST
207
- CFLAGS += -DGGML_USE_CLBLAST
208
- CXXFLAGS += -DGGML_USE_CLBLAST
250
+
251
+ CFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags clblast OpenCL)
252
+ CXXFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags clblast OpenCL)
253
+
209
254
# Mac provides OpenCL as a framework
210
255
ifeq ($(UNAME_S),Darwin)
211
256
LDFLAGS += -lclblast -framework OpenCL
212
257
else
213
- LDFLAGS += -lclblast -lOpenCL
258
+ LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
214
259
endif
215
260
OBJS += ggml-opencl.o
216
261
@@ -284,23 +329,31 @@ $(info )
284
329
ggml.o : ggml.c ggml.h ggml-cuda.h
285
330
$(CC ) $(CFLAGS ) -c $< -o $@
286
331
287
- llama.o : llama.cpp ggml.h ggml-cuda.h ggml-metal.h llama.h llama-util.h
332
+ ggml-alloc.o : ggml-alloc.c ggml.h ggml-alloc.h
333
+ $(CC ) $(CFLAGS ) -c $< -o $@
334
+
335
+ OBJS += ggml-alloc.o
336
+
337
+ llama.o : llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h llama-util.h
288
338
$(CXX ) $(CXXFLAGS ) -c $< -o $@
289
339
290
340
common.o : examples/common.cpp examples/common.h
291
341
$(CXX ) $(CXXFLAGS ) -c $< -o $@
292
342
343
+ grammar-parser.o : examples/grammar-parser.cpp examples/grammar-parser.h
344
+ $(CXX ) $(CXXFLAGS ) -c $< -o $@
345
+
293
346
libllama.so : llama.o ggml.o $(OBJS )
294
347
$(CXX ) $(CXXFLAGS ) -shared -fPIC -o $@ $^ $(LDFLAGS )
295
348
296
349
clean :
297
- rm -vf * .o * .so main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test build-info.h
350
+ rm -vf * .o * .so * .dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test build-info.h $( TEST_TARGETS )
298
351
299
352
#
300
353
# Examples
301
354
#
302
355
303
- main : examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS )
356
+ main : examples/main/main.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS )
304
357
$(CXX ) $(CXXFLAGS ) $(filter-out % .h,$^ ) -o $@ $(LDFLAGS )
305
358
@echo
306
359
@echo ' ==== Run ./main -h for help. ===='
@@ -324,15 +377,15 @@ embedding: examples/embedding/embedding.cpp build-info.h ggml.
324
377
save-load-state : examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS )
325
378
$(CXX ) $(CXXFLAGS ) $(filter-out % .h,$^ ) -o $@ $(LDFLAGS )
326
379
327
- server : examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS )
328
- $(CXX ) $(CXXFLAGS ) -Iexamples/server $(filter-out % .h,$(filter-out % .hpp,$^ ) ) -o $@ $(LDFLAGS )
380
+ server : examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o common.o $(OBJS )
381
+ $(CXX ) $(CXXFLAGS ) -Iexamples/server $(filter-out % .h,$(filter-out % .hpp,$^ ) ) -o $@ $(LDFLAGS ) $( LWINSOCK2 )
329
382
330
- libembdinput.so : examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o common.o $(OBJS )
383
+ $( LIB_PRE ) embdinput $( DSO_EXT ) : examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o common.o $(OBJS )
331
384
$(CXX ) --shared $(CXXFLAGS ) $(filter-out % .h,$(filter-out % .hpp,$^ ) ) -o $@ $(LDFLAGS )
332
385
333
386
334
- embd-input-test : libembdinput.so examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS )
335
- $(CXX ) $(CXXFLAGS ) $(filter-out % .so ,$(filter-out % .h,$(filter-out % .hpp,$^ ) ) ) -o $@ $(LDFLAGS ) -L. -lembdinput
387
+ embd-input-test : $( LIB_PRE ) embdinput $( DSO_EXT ) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS )
388
+ $(CXX ) $(CXXFLAGS ) $(filter-out % $( DSO_EXT ) ,$(filter-out % .h,$(filter-out % .hpp,$^ ) ) ) -o $@ $(LDFLAGS ) -L. -lembdinput
336
389
337
390
train-text-from-scratch : examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o $(OBJS )
338
391
$(CXX ) $(CXXFLAGS ) $(filter-out % .h,$^ ) -o $@ $(LDFLAGS )
@@ -349,13 +402,32 @@ build-info.h: $(wildcard .git/index) scripts/build-info.sh
349
402
# Tests
350
403
#
351
404
405
+ tests : $(TEST_TARGETS )
406
+
352
407
benchmark-matmult : examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS )
353
408
$(CXX ) $(CXXFLAGS ) $(filter-out % .h,$^ ) -o $@ $(LDFLAGS )
354
409
./$@
355
410
356
411
vdot : pocs/vdot/vdot.cpp ggml.o $(OBJS )
357
412
$(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
358
413
359
- .PHONY : tests clean
360
- tests :
361
- bash ./tests/run-tests.sh
414
+ tests/test-double-float : tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS )
415
+ $(CXX ) $(CXXFLAGS ) $(filter-out % .txt,$^ ) -o $@ $(LDFLAGS )
416
+
417
+ tests/test-grad0 : tests/test-grad0.cpp build-info.h ggml.o llama.o common.o $(OBJS )
418
+ $(CXX ) $(CXXFLAGS ) $(filter-out % .txt,$^ ) -o $@ $(LDFLAGS )
419
+
420
+ tests/test-opt : tests/test-opt.cpp build-info.h ggml.o llama.o common.o $(OBJS )
421
+ $(CXX ) $(CXXFLAGS ) $(filter-out % .txt,$^ ) -o $@ $(LDFLAGS )
422
+
423
+ tests/test-quantize-fns : tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS )
424
+ $(CXX ) $(CXXFLAGS ) $(filter-out % .txt,$^ ) -o $@ $(LDFLAGS )
425
+
426
+ tests/test-quantize-perf : tests/test-quantize-perf.cpp build-info.h ggml.o llama.o common.o $(OBJS )
427
+ $(CXX ) $(CXXFLAGS ) $(filter-out % .txt,$^ ) -o $@ $(LDFLAGS )
428
+
429
+ tests/test-sampling : tests/test-sampling.cpp build-info.h ggml.o llama.o common.o $(OBJS )
430
+ $(CXX ) $(CXXFLAGS ) $(filter-out % .txt,$^ ) -o $@ $(LDFLAGS )
431
+
432
+ tests/test-tokenizer-0 : tests/test-tokenizer-0.cpp build-info.h ggml.o llama.o common.o $(OBJS )
433
+ $(CXX ) $(CXXFLAGS ) $(filter-out % .txt,$^ ) -o $@ $(LDFLAGS )
0 commit comments