@@ -57,6 +57,8 @@ ifeq ($(UNAME_S),Darwin)
57
57
LLAMA_METAL := 1
58
58
endif
59
59
60
+ LLAMA_NO_OPENMP := 1
61
+
60
62
ifneq ($(UNAME_P),arm)
61
63
SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
62
64
ifeq ($(SYSCTL_M),1)
@@ -67,6 +69,10 @@ ifeq ($(UNAME_S),Darwin)
67
69
endif
68
70
endif
69
71
72
+ ifdef LLAMA_RPC
73
+ BUILD_TARGETS += rpc-server
74
+ endif
75
+
70
76
default : $(BUILD_TARGETS )
71
77
72
78
test : $(TEST_TARGETS )
@@ -135,12 +141,16 @@ MK_NVCCFLAGS = -std=c++11
135
141
ifdef LLAMA_FAST
136
142
MK_CFLAGS += -Ofast
137
143
HOST_CXXFLAGS += -Ofast
144
+ ifndef LLAMA_DEBUG
138
145
MK_NVCCFLAGS += -O3
146
+ endif # LLAMA_DEBUG
139
147
else
140
148
MK_CFLAGS += -O3
141
149
MK_CXXFLAGS += -O3
150
+ ifndef LLAMA_DEBUG
142
151
MK_NVCCFLAGS += -O3
143
- endif
152
+ endif # LLAMA_DEBUG
153
+ endif # LLAMA_FAST
144
154
145
155
ifndef LLAMA_NO_CCACHE
146
156
CCACHE := $(shell which ccache)
@@ -201,9 +211,10 @@ ifdef LLAMA_SCHED_MAX_COPIES
201
211
endif
202
212
203
213
ifdef LLAMA_DEBUG
204
- MK_CFLAGS += -O0 -g
205
- MK_CXXFLAGS += -O0 -g
206
- MK_LDFLAGS += -g
214
+ MK_CFLAGS += -O0 -g
215
+ MK_CXXFLAGS += -O0 -g
216
+ MK_LDFLAGS += -g
217
+ MK_NVCCFLAGS += -O0 -g
207
218
208
219
ifeq ($(UNAME_S),Linux)
209
220
MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
@@ -400,6 +411,12 @@ ifndef LLAMA_NO_ACCELERATE
400
411
endif
401
412
endif # LLAMA_NO_ACCELERATE
402
413
414
+ ifndef LLAMA_NO_OPENMP
415
+ MK_CPPFLAGS += -DGGML_USE_OPENMP
416
+ MK_CFLAGS += -fopenmp
417
+ MK_CXXFLAGS += -fopenmp
418
+ endif # LLAMA_NO_OPENMP
419
+
403
420
ifdef LLAMA_OPENBLAS
404
421
MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
405
422
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
@@ -416,11 +433,25 @@ ifdef LLAMA_BLIS
416
433
MK_LDFLAGS += -lblis -L/usr/local/lib
417
434
endif # LLAMA_BLIS
418
435
436
+ ifdef LLAMA_RPC
437
+ MK_CPPFLAGS += -DGGML_USE_RPC
438
+ OBJS += ggml-rpc.o
439
+ endif # LLAMA_RPC
440
+
419
441
ifdef LLAMA_CUBLAS
420
442
# LLAMA_CUBLAS is deprecated and will be removed in the future
421
443
LLAMA_CUDA := 1
422
444
endif
423
445
446
+ OBJS_CUDA_TEMP_INST = $(patsubst % .cu,% .o,$(wildcard ggml-cuda/template-instances/fattn-wmma* .cu) )
447
+ ifdef LLAMA_CUDA_FA_ALL_QUANTS
448
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
449
+ else
450
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
451
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
452
+ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
453
+ endif # LLAMA_CUDA_FA_ALL_QUANTS
454
+
424
455
ifdef LLAMA_CUDA
425
456
ifneq ('', '$(wildcard /opt/cuda)')
426
457
CUDA_PATH ?= /opt/cuda
@@ -431,6 +462,7 @@ ifdef LLAMA_CUDA
431
462
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
432
463
OBJS += ggml-cuda.o
433
464
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
465
+ OBJS += $(OBJS_CUDA_TEMP_INST)
434
466
MK_NVCCFLAGS += -use_fast_math
435
467
ifdef LLAMA_FATAL_WARNINGS
436
468
MK_NVCCFLAGS += -Werror all-warnings
@@ -493,7 +525,10 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
493
525
endif # LLAMA_CUDA_NO_PEER_COPY
494
526
ifdef LLAMA_CUDA_CCBIN
495
527
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
496
- endif
528
+ endif # LLAMA_CUDA_CCBIN
529
+ ifdef LLAMA_CUDA_FA_ALL_QUANTS
530
+ MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
531
+ endif # LLAMA_CUDA_FA_ALL_QUANTS
497
532
498
533
ifdef JETSON_EOL_MODULE_DETECT
499
534
define NVCC_COMPILE
@@ -505,7 +540,7 @@ define NVCC_COMPILE
505
540
endef # NVCC_COMPILE
506
541
endif # JETSON_EOL_MODULE_DETECT
507
542
508
- ggml-cuda/% .o : ggml-cuda/% .cu ggml-cuda/ % .cuh ggml .h ggml-common.h ggml-cuda/common.cuh
543
+ ggml-cuda/% .o : ggml-cuda/% .cu ggml.h ggml-common.h ggml-cuda/common.cuh
509
544
$(NVCC_COMPILE )
510
545
511
546
ggml-cuda.o : ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/* .cuh)
@@ -571,6 +606,7 @@ ifdef LLAMA_HIP_UMA
571
606
MK_CPPFLAGS += -DGGML_HIP_UMA
572
607
endif # LLAMA_HIP_UMA
573
608
MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
609
+ MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
574
610
MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
575
611
HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
576
612
HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
@@ -584,11 +620,12 @@ ifdef LLAMA_CUDA_NO_PEER_COPY
584
620
endif # LLAMA_CUDA_NO_PEER_COPY
585
621
OBJS += ggml-cuda.o
586
622
OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
623
+ OBJS += $(OBJS_CUDA_TEMP_INST)
587
624
588
625
ggml-cuda.o : ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/* .cuh)
589
626
$(HIPCC ) $(CXXFLAGS ) $(HIPFLAGS ) -x hip -c -o $@ $<
590
627
591
- ggml-cuda/% .o : ggml-cuda/% .cu ggml-cuda/ % .cuh ggml .h ggml-common.h ggml-cuda/common.cuh
628
+ ggml-cuda/% .o : ggml-cuda/% .cu ggml.h ggml-common.h ggml-cuda/common.cuh
592
629
$(HIPCC ) $(CXXFLAGS ) $(HIPFLAGS ) -x hip -c -o $@ $<
593
630
594
631
endif # LLAMA_HIPBLAS
@@ -626,11 +663,26 @@ ggml-metal-embed.o: ggml-metal.metal ggml-common.h
626
663
endif
627
664
endif # LLAMA_METAL
628
665
666
+ OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
667
+ COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
668
+ COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
669
+
629
670
ifndef LLAMA_NO_LLAMAFILE
630
671
sgemm.o : sgemm.cpp sgemm.h ggml.h
631
672
$(CXX ) $(CXXFLAGS ) -c $< -o $@
632
673
endif
633
674
675
+ ifdef LLAMA_RPC
676
+ ggml-rpc.o : ggml-rpc.cpp ggml-rpc.h
677
+ $(CXX ) $(CXXFLAGS ) -c $< -o $@
678
+
679
+ rpc-server.o : examples/rpc/rpc-server.cpp ggml-rpc.h
680
+ $(CXX ) $(CXXFLAGS ) -c $< -o $@
681
+
682
+ rpc-server : rpc-server.o ggml.o llama.o $(COMMON_DEPS ) $(OBJS )
683
+ $(CXX ) $(CXXFLAGS ) $^ -o $@ $(LDFLAGS )
684
+ endif # LLAMA_RPC
685
+
634
686
GF_CC := $(CC )
635
687
include scripts/get-flags.mk
636
688
@@ -710,14 +762,9 @@ unicode.o: unicode.cpp unicode.h
710
762
unicode-data.o : unicode-data.cpp unicode-data.h
711
763
$(CXX ) $(CXXFLAGS ) -c $< -o $@
712
764
713
- OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
714
-
715
765
llama.o : llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
716
766
$(CXX ) $(CXXFLAGS ) -c $< -o $@
717
767
718
- COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
719
- COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
720
-
721
768
common.o : common/common.cpp $(COMMON_H_DEPS )
722
769
$(CXX ) $(CXXFLAGS ) -c $< -o $@
723
770
@@ -748,6 +795,7 @@ libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
748
795
clean :
749
796
rm -vrf * .o tests/* .o * .so * .a * .dll benchmark-matmult lookup-create lookup-merge lookup-stats common/build-info.cpp * .dot $(COV_TARGETS ) $(BUILD_TARGETS ) $(TEST_TARGETS )
750
797
rm -vrf ggml-cuda/* .o
798
+ rm -vrf ggml-cuda/template-instances/* .o
751
799
find examples pocs -type f -name " *.o" -delete
752
800
753
801
#
@@ -816,7 +864,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
816
864
$(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
817
865
$(CXX ) $(CXXFLAGS ) $(filter-out % .h $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
818
866
819
- server : examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index. js.hpp examples/server/completion.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS ) grammar-parser.o $(OBJS )
867
+ server : examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/ index.html.hpp examples/server/index-new.html.hpp examples/server/index. js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats .js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS ) grammar-parser.o $(OBJS )
820
868
$(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
821
869
$(CXX ) $(CXXFLAGS ) $(filter-out % .h % .hpp $< ,$^ ) -Iexamples/server $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS ) $(LWINSOCK2 )
822
870
0 commit comments