@@ -64,10 +64,14 @@ TEST_TARGETS = \
64
64
tests/test-tokenizer-1-spm
65
65
66
66
# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
67
- LEGACY_TARGETS = main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
67
+ LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
68
68
simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
69
69
retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm
70
70
71
+ # Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them.
72
+ # We don't want to clutter things too much, so we only build replacements for the most commonly used binaries.
73
+ LEGACY_TARGETS_BUILD = main quantize perplexity embedding server finetune
74
+
71
75
# Deprecation aliases
72
76
ifdef LLAMA_CUBLAS
73
77
$(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.)
@@ -193,7 +197,7 @@ ifdef GGML_RPC
193
197
BUILD_TARGETS += rpc-server
194
198
endif
195
199
196
- default : $(BUILD_TARGETS )
200
+ default : $(BUILD_TARGETS ) $( LEGACY_TARGETS_BUILD )
197
201
198
202
test : $(TEST_TARGETS )
199
203
@failures=0; \
@@ -228,7 +232,7 @@ test: $(TEST_TARGETS)
228
232
fi
229
233
@echo ' All tests passed.'
230
234
231
- all : $(BUILD_TARGETS ) $(TEST_TARGETS )
235
+ all : $(BUILD_TARGETS ) $(TEST_TARGETS ) $( LEGACY_TARGETS_BUILD )
232
236
233
237
ifdef RISCV_CROSS_COMPILE
234
238
CC := riscv64-unknown-linux-gnu-gcc
@@ -245,17 +249,22 @@ MK_CFLAGS = -std=c11 -fPIC
245
249
MK_CXXFLAGS = -std=c++11 -fPIC
246
250
MK_NVCCFLAGS = -std=c++11
247
251
248
- ifndef LLAMA_NO_CCACHE
252
+ ifdef LLAMA_NO_CCACHE
253
+ GGML_NO_CCACHE := 1
254
+ DEPRECATE_WARNING := 1
255
+ endif
256
+
257
+ ifndef GGML_NO_CCACHE
249
258
CCACHE := $(shell which ccache)
250
259
ifdef CCACHE
251
260
export CCACHE_SLOPPINESS = time_macros
252
- $(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE .)
261
+ $(info I ccache found, compilation results will be cached. Disable with GGML_NO_CCACHE .)
253
262
CC := $(CCACHE ) $(CC )
254
263
CXX := $(CCACHE ) $(CXX )
255
264
else
256
265
$(info I ccache not found. Consider installing it for faster compilation.)
257
266
endif # CCACHE
258
- endif # LLAMA_NO_CCACHE
267
+ endif # GGML_NO_CCACHE
259
268
260
269
# clock_gettime came in POSIX.1b (1993)
261
270
# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
@@ -545,7 +554,7 @@ endif # GGML_BLIS
545
554
546
555
ifndef GGML_NO_LLAMAFILE
547
556
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
548
- OBJ_GGML += ggml/src/sgemm.o
557
+ OBJ_GGML += ggml/src/llamafile/ sgemm.o
549
558
endif
550
559
551
560
ifdef GGML_RPC
@@ -826,7 +835,8 @@ OBJ_GGML += \
826
835
ggml/src/ggml.o \
827
836
ggml/src/ggml-alloc.o \
828
837
ggml/src/ggml-backend.o \
829
- ggml/src/ggml-quants.o
838
+ ggml/src/ggml-quants.o \
839
+ ggml/src/ggml-aarch64.o
830
840
831
841
OBJ_LLAMA = \
832
842
src/llama.o \
@@ -926,6 +936,7 @@ $(info - LLAMA_NO_LLAMAFILE)
926
936
$(info - LLAMA_NO_ACCELERATE)
927
937
$(info - LLAMA_NO_OPENMP)
928
938
$(info - LLAMA_NO_METAL)
939
+ $(info - LLAMA_NO_CCACHE)
929
940
$(info )
930
941
endif
931
942
@@ -959,15 +970,22 @@ ggml/src/ggml-quants.o: \
959
970
ggml/src/ggml-common.h
960
971
$(CC ) $(CFLAGS ) -c $< -o $@
961
972
973
+ ggml/src/ggml-aarch64.o : \
974
+ ggml/src/ggml-aarch64.c \
975
+ ggml/include/ggml.h \
976
+ ggml/src/ggml-aarch64.h \
977
+ ggml/src/ggml-common.h
978
+ $(CC ) $(CFLAGS ) -c $< -o $@
979
+
962
980
ggml/src/ggml-blas.o : \
963
981
ggml/src/ggml-blas.cpp \
964
982
ggml/include/ggml-blas.h
965
983
$(CXX ) $(CXXFLAGS ) -c $< -o $@
966
984
967
985
ifndef GGML_NO_LLAMAFILE
968
- ggml/src/sgemm.o : \
969
- ggml/src/sgemm.cpp \
970
- ggml/src/sgemm.h \
986
+ ggml/src/llamafile/ sgemm.o : \
987
+ ggml/src/llamafile/ sgemm.cpp \
988
+ ggml/src/llamafile/ sgemm.h \
971
989
ggml/include/ggml.h
972
990
$(CXX ) $(CXXFLAGS ) -c $< -o $@
973
991
endif # GGML_NO_LLAMAFILE
@@ -1092,7 +1110,7 @@ clean:
1092
1110
rm -vrf ggml/src/ggml-cuda/template-instances/* .o
1093
1111
rm -rvf $(BUILD_TARGETS )
1094
1112
rm -rvf $(TEST_TARGETS )
1095
- rm -rvf $(LEGACY_TARGETS )
1113
+ rm -rvf $(LEGACY_TARGETS_CLEAN )
1096
1114
find examples pocs -type f -name " *.o" -delete
1097
1115
1098
1116
#
@@ -1488,3 +1506,61 @@ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
1488
1506
$(OBJ_GGML )
1489
1507
$(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
1490
1508
$(CXX ) $(CXXFLAGS ) $(filter-out $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1509
+
1510
+ #
1511
+ # Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed.
1512
+ #
1513
+ # Mark legacy binary targets as .PHONY so that they are always checked.
1514
+ .PHONY : main quantize perplexity embedding server finetune
1515
+
1516
+ # NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate.
1517
+ # Eventually we will want to remove these target from building all the time.
1518
+ main : examples/deprecation-warning/deprecation-warning.cpp
1519
+ $(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
1520
+ $(CXX ) $(CXXFLAGS ) $(filter-out $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1521
+ @echo " NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead."
1522
+
1523
+ server : examples/deprecation-warning/deprecation-warning.cpp
1524
+ $(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
1525
+ $(CXX ) $(CXXFLAGS ) $(filter-out % .h $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1526
+ @echo " NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead."
1527
+
1528
+ quantize : examples/deprecation-warning/deprecation-warning.cpp
1529
+ ifneq (,$(wildcard quantize) )
1530
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1531
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1532
+ @echo "# ########"
1533
+ @echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead."
1534
+ @echo " Remove the 'quantize' binary to remove this warning."
1535
+ @echo "# ########"
1536
+ endif
1537
+
1538
+ perplexity : examples/deprecation-warning/deprecation-warning.cpp
1539
+ ifneq (,$(wildcard perplexity) )
1540
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1541
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1542
+ @echo "# ########"
1543
+ @echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead."
1544
+ @echo " Remove the 'perplexity' binary to remove this warning."
1545
+ @echo "# ########"
1546
+ endif
1547
+
1548
+ embedding : examples/deprecation-warning/deprecation-warning.cpp
1549
+ ifneq (,$(wildcard embedding) )
1550
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1551
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1552
+ @echo "# ########"
1553
+ @echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead."
1554
+ @echo " Remove the 'embedding' binary to remove this warning."
1555
+ @echo "# ########"
1556
+ endif
1557
+
1558
+ finetune : examples/deprecation-warning/deprecation-warning.cpp
1559
+ ifneq (,$(wildcard finetune) )
1560
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1561
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1562
+ @echo "# ########"
1563
+ @echo "WARNING: The 'finetune' binary is deprecated. Please use 'llama-finetune' instead."
1564
+ @echo " Remove the 'finetune' binary to remove this warning."
1565
+ @echo "# ########"
1566
+ endif
0 commit comments