Skip to content

Commit 5020781

Browse files
authored
Merge branch 'master' into python-endpoint
2 parents 878df64 + 0bf7cf1 commit 5020781

37 files changed

+6875
-746
lines changed

.devops/full.Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,6 @@ COPY . .
1616

1717
RUN make
1818

19-
ENTRYPOINT ["/app/.devops/tools.py"]
19+
ENV LC_ALL=C.utf8
20+
21+
ENTRYPOINT ["/app/.devops/tools.py"]

.devops/main.Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,6 @@ FROM ubuntu:$UBUNTU_VERSION as runtime
1515

1616
COPY --from=build /app/main /main
1717

18+
ENV LC_ALL=C.utf8
19+
1820
ENTRYPOINT [ "/main" ]

.github/workflows/tidy-post.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: clang-tidy review post comments
22

33
on:
4-
workflow_run:
4+
workflow_dispatch:
55
workflows: ["clang-tidy-review"]
66
types:
77
- completed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
.envrc
88
.swiftpm
99
.venv
10+
.clang-tidy
1011
.vs/
1112
.vscode/
1213

@@ -17,6 +18,7 @@ build-release/
1718
build-static/
1819
build-cublas/
1920
build-opencl/
21+
build-metal/
2022
build-no-accel/
2123
build-sanitize-addr/
2224
build-sanitize-thread/
@@ -33,6 +35,7 @@ models/*
3335
/benchmark-matmult
3436
/vdot
3537
/Pipfile
38+
/libllama.so
3639

3740
build-info.h
3841
arm_neon.h

CMakeLists.txt

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,15 @@ if (NOT MSVC)
6464
endif()
6565

6666
# 3rd party libs
67-
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
68-
option(LLAMA_BLAS "llama: use BLAS" OFF)
67+
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
68+
option(LLAMA_BLAS "llama: use BLAS" OFF)
6969
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
70-
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
71-
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
72-
set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels")
73-
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
70+
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
71+
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
72+
set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels")
73+
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
74+
option(LLAMA_METAL "llama: use Metal" OFF)
75+
option(LLAMA_K_QUANTS "llama: use k-quants" ON)
7476

7577
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
7678
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
@@ -183,7 +185,7 @@ if (LLAMA_CUBLAS)
183185

184186
enable_language(CUDA)
185187

186-
set(GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h)
188+
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
187189

188190
add_compile_definitions(GGML_USE_CUBLAS)
189191
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
@@ -200,12 +202,42 @@ if (LLAMA_CUBLAS)
200202
endif()
201203
endif()
202204

205+
if (LLAMA_METAL)
206+
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
207+
find_library(METAL_FRAMEWORK Metal REQUIRED)
208+
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
209+
find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED)
210+
211+
set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h)
212+
213+
add_compile_definitions(GGML_USE_METAL)
214+
add_compile_definitions(GGML_METAL_NDEBUG)
215+
216+
# get full path to the file
217+
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
218+
219+
# copy ggml-metal.metal to bin directory
220+
configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY)
221+
222+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
223+
${FOUNDATION_LIBRARY}
224+
${METAL_FRAMEWORK}
225+
${METALKIT_FRAMEWORK}
226+
${METALPERFORMANCE_FRAMEWORK}
227+
)
228+
endif()
229+
230+
if (LLAMA_K_QUANTS)
231+
set(GGML_SOURCES_EXTRA ${GGML_SOURCES_EXTRA} k_quants.c k_quants.h)
232+
add_compile_definitions(GGML_USE_K_QUANTS)
233+
endif()
234+
203235
if (LLAMA_CLBLAST)
204236
find_package(CLBlast)
205237
if (CLBlast_FOUND)
206238
message(STATUS "CLBlast found")
207239

208-
set(GGML_OPENCL_SOURCES ggml-opencl.cpp ggml-opencl.h)
240+
set(GGML_SOURCES_OPENCL ggml-opencl.cpp ggml-opencl.h)
209241

210242
add_compile_definitions(GGML_USE_CLBLAST)
211243

@@ -370,8 +402,11 @@ endif()
370402
add_library(ggml OBJECT
371403
ggml.c
372404
ggml.h
373-
${GGML_CUDA_SOURCES}
374-
${GGML_OPENCL_SOURCES})
405+
${GGML_SOURCES_CUDA}
406+
${GGML_SOURCES_OPENCL}
407+
${GGML_SOURCES_METAL}
408+
${GGML_SOURCES_EXTRA}
409+
)
375410

376411
target_include_directories(ggml PUBLIC .)
377412
target_compile_features(ggml PUBLIC c_std_11) # don't bump
@@ -384,21 +419,25 @@ endif()
384419
add_library(llama
385420
llama.cpp
386421
llama.h
387-
llama-util.h)
422+
llama-util.h
423+
)
388424

389425
target_include_directories(llama PUBLIC .)
390426
target_compile_features(llama PUBLIC cxx_std_11) # don't bump
391-
target_link_libraries(llama PRIVATE ggml ${LLAMA_EXTRA_LIBS})
427+
target_link_libraries(llama PRIVATE
428+
ggml
429+
${LLAMA_EXTRA_LIBS}
430+
)
392431

393432
if (BUILD_SHARED_LIBS)
394433
set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
395434
target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD)
396435
endif()
397436

398-
if (GGML_CUDA_SOURCES)
437+
if (GGML_SOURCES_CUDA)
399438
message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
400-
set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF)
401-
set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
439+
set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF)
440+
set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
402441
set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF)
403442
endif()
404443

Makefile

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,11 @@ endif
4040
#
4141

4242
# keep standard at C11 and C++11
43-
CFLAGS = -I. -O3 -std=c11 -fPIC
44-
CXXFLAGS = -I. -I./examples -O3 -std=c++11 -fPIC
43+
# -Ofast tends to produce faster code, but may not be available for some compilers.
44+
#OPT = -Ofast
45+
OPT = -O3
46+
CFLAGS = -I. $(OPT) -std=c11 -fPIC
47+
CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC
4548
LDFLAGS =
4649

4750
ifdef LLAMA_DEBUG
@@ -105,6 +108,7 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
105108
#CFLAGS += -mfma -mf16c -mavx
106109
#CXXFLAGS += -mfma -mf16c -mavx
107110
endif
111+
108112
ifneq ($(filter ppc64%,$(UNAME_M)),)
109113
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
110114
ifneq (,$(findstring POWER9,$(POWER9_M)))
@@ -116,26 +120,35 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
116120
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
117121
endif
118122
endif
123+
124+
ifndef LLAMA_NO_K_QUANTS
125+
CFLAGS += -DGGML_USE_K_QUANTS
126+
OBJS += k_quants.o
127+
endif
128+
119129
ifndef LLAMA_NO_ACCELERATE
120130
# Mac M1 - include Accelerate framework.
121131
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
122132
ifeq ($(UNAME_S),Darwin)
123133
CFLAGS += -DGGML_USE_ACCELERATE
124134
LDFLAGS += -framework Accelerate
125135
endif
126-
endif
136+
endif # LLAMA_NO_ACCELERATE
137+
127138
ifdef LLAMA_OPENBLAS
128139
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas -I/usr/include/openblas
129140
ifneq ($(shell grep -e "Arch Linux" -e "ID_LIKE=arch" /etc/os-release 2>/dev/null),)
130141
LDFLAGS += -lopenblas -lcblas
131142
else
132143
LDFLAGS += -lopenblas
133144
endif
134-
endif
145+
endif # LLAMA_OPENBLAS
146+
135147
ifdef LLAMA_BLIS
136-
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
148+
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
137149
LDFLAGS += -lblis -L/usr/local/lib
138-
endif
150+
endif # LLAMA_BLIS
151+
139152
ifdef LLAMA_CUBLAS
140153
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
141154
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
@@ -156,38 +169,59 @@ endif # LLAMA_CUDA_DMMV_Y
156169
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
157170
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
158171
endif # LLAMA_CUBLAS
172+
159173
ifdef LLAMA_CLBLAST
160-
CFLAGS += -DGGML_USE_CLBLAST
161-
CXXFLAGS += -DGGML_USE_CLBLAST
174+
CFLAGS += -DGGML_USE_CLBLAST
175+
CXXFLAGS += -DGGML_USE_CLBLAST
162176
# Mac provides OpenCL as a framework
163177
ifeq ($(UNAME_S),Darwin)
164178
LDFLAGS += -lclblast -framework OpenCL
165179
else
166180
LDFLAGS += -lclblast -lOpenCL
167181
endif
168182
OBJS += ggml-opencl.o
183+
169184
ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
170185
$(CXX) $(CXXFLAGS) -c $< -o $@
171-
endif
186+
endif # LLAMA_CLBLAST
187+
188+
ifdef LLAMA_METAL
189+
CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG
190+
CXXFLAGS += -DGGML_USE_METAL
191+
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
192+
OBJS += ggml-metal.o
193+
194+
ggml-metal.o: ggml-metal.m ggml-metal.h
195+
$(CC) $(CFLAGS) -c $< -o $@
196+
endif # LLAMA_METAL
197+
172198
ifneq ($(filter aarch64%,$(UNAME_M)),)
173199
# Apple M1, M2, etc.
174200
# Raspberry Pi 3, 4, Zero 2 (64-bit)
175201
CFLAGS += -mcpu=native
176202
CXXFLAGS += -mcpu=native
177203
endif
204+
178205
ifneq ($(filter armv6%,$(UNAME_M)),)
179206
# Raspberry Pi 1, Zero
180207
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
181208
endif
209+
182210
ifneq ($(filter armv7%,$(UNAME_M)),)
183211
# Raspberry Pi 2
184212
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
185213
endif
214+
186215
ifneq ($(filter armv8%,$(UNAME_M)),)
187216
# Raspberry Pi 3, 4, Zero 2 (32-bit)
188217
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
189218
endif
190219

220+
ifdef LLAMA_NO_K_QUANTS
221+
k_quants.o: k_quants.c k_quants.h
222+
$(CC) $(CFLAGS) -c $< -o $@
223+
endif # LLAMA_NO_K_QUANTS
224+
191225
#
192226
# Print build information
193227
#
@@ -226,22 +260,22 @@ clean:
226260
# Examples
227261
#
228262

229-
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS)
263+
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS)
230264
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
231265
@echo
232266
@echo '==== Run ./main -h for help. ===='
233267
@echo
234268

235-
quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS)
269+
quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS)
236270
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
237271

238-
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o $(OBJS)
272+
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o $(OBJS)
239273
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
240274

241-
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o $(OBJS)
275+
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o $(OBJS)
242276
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
243277

244-
embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS)
278+
embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS)
245279
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
246280

247281
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)

0 commit comments

Comments
 (0)