Skip to content

Commit 49dc902

Browse files
authored
Merge branch 'master' into readme-convert
2 parents 2a710b2 + d40fded commit 49dc902

File tree

20 files changed

+2164
-915
lines changed

20 files changed

+2164
-915
lines changed

.github/workflows/build.yml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ on:
88
required: true
99
type: boolean
1010
push:
11+
branches:
12+
- master
1113
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
1214
pull_request:
1315
types: [opened, synchronize, edited, reopened, review_requested, ready_for_review]
@@ -18,6 +20,8 @@ env:
1820

1921
jobs:
2022
ubuntu-latest-make:
23+
if: github.event.pull_request.draft == false
24+
2125
runs-on: ubuntu-latest
2226

2327
steps:
@@ -37,6 +41,8 @@ jobs:
3741
make
3842
3943
ubuntu-latest-cmake:
44+
if: github.event.pull_request.draft == false
45+
4046
runs-on: ubuntu-latest
4147

4248
steps:
@@ -65,6 +71,8 @@ jobs:
6571
ctest --verbose
6672
6773
ubuntu-latest-cmake-sanitizer:
74+
if: github.event.pull_request.draft == false
75+
6876
runs-on: ubuntu-latest
6977

7078
continue-on-error: true
@@ -73,7 +81,6 @@ jobs:
7381
matrix:
7482
sanitizer: [ADDRESS, THREAD, UNDEFINED]
7583
build_type: [Debug, Release]
76-
accelerate: [ON, OFF]
7784

7885
steps:
7986
- name: Clone
@@ -91,7 +98,7 @@ jobs:
9198
run: |
9299
mkdir build
93100
cd build
94-
cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DLLAMA_ACCELERATE=${{ matrix.accelerate }}
101+
cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
95102
cmake --build . --config ${{ matrix.build_type }}
96103
97104
- name: Test
@@ -101,6 +108,8 @@ jobs:
101108
ctest --verbose
102109
103110
macOS-latest-make:
111+
if: github.event.pull_request.draft == false
112+
104113
runs-on: macos-latest
105114

106115
steps:
@@ -119,6 +128,8 @@ jobs:
119128
make
120129
121130
macOS-latest-cmake:
131+
if: github.event.pull_request.draft == false
132+
122133
runs-on: macOS-latest
123134

124135
steps:
@@ -146,6 +157,8 @@ jobs:
146157
ctest --verbose
147158
148159
windows-latest-cmake:
160+
if: github.event.pull_request.draft == false
161+
149162
runs-on: windows-latest
150163

151164
strategy:

.github/workflows/docker.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ on:
1818
jobs:
1919
push_to_registry:
2020
name: Push Docker image to Docker Hub
21+
if: github.event.pull_request.draft == false
22+
2123
runs-on: ubuntu-latest
2224
env:
2325
COMMIT_SHA: ${{ github.sha }}

.gitignore

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
*.o
22
*.a
3+
.DS_Store
4+
.build/
35
.cache/
6+
.direnv/
7+
.envrc
8+
.swiftpm
9+
.venv
410
.vs/
511
.vscode/
6-
.DS_Store
712

8-
.build/
913
build/
1014
build-em/
1115
build-debug/
@@ -24,17 +28,15 @@ models/*
2428
/perplexity
2529
/embedding
2630
/benchmark-q4_0-matmult
31+
/vdot
2732
/Pipfile
2833

2934
arm_neon.h
3035
compile_commands.json
3136

32-
.envrc
33-
.direnv/
34-
35-
.venv
3637
__pycache__
37-
.swiftpm
3838

3939
zig-out/
4040
zig-cache/
41+
42+
ppl-*.txt

CMakeLists.txt

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ endif()
6666
# 3rd party libs
6767
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
6868
option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF)
69+
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
6970

7071
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
7172
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
@@ -109,6 +110,7 @@ if (APPLE AND LLAMA_ACCELERATE)
109110
message(WARNING "Accelerate framework not found")
110111
endif()
111112
endif()
113+
112114
if (LLAMA_OPENBLAS)
113115
if (LLAMA_STATIC)
114116
set(BLA_STATIC ON)
@@ -142,6 +144,30 @@ if (LLAMA_OPENBLAS)
142144
endif()
143145
endif()
144146

147+
if (LLAMA_CUBLAS)
148+
cmake_minimum_required(VERSION 3.17)
149+
150+
find_package(CUDAToolkit)
151+
if (CUDAToolkit_FOUND)
152+
message(STATUS "cuBLAS found")
153+
154+
enable_language(CUDA)
155+
156+
set(GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h)
157+
158+
add_compile_definitions(GGML_USE_CUBLAS)
159+
160+
if (LLAMA_STATIC)
161+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
162+
else()
163+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
164+
endif()
165+
166+
else()
167+
message(WARNING "cuBLAS not found")
168+
endif()
169+
endif()
170+
145171
if (LLAMA_ALL_WARNINGS)
146172
if (NOT MSVC)
147173
set(c_flags
@@ -153,7 +179,6 @@ if (LLAMA_ALL_WARNINGS)
153179
-Wshadow
154180
-Wstrict-prototypes
155181
-Wpointer-arith
156-
-Wno-unused-function
157182
)
158183
set(cxx_flags
159184
-Wall
@@ -221,21 +246,26 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
221246
message(STATUS "x86 detected")
222247
if (MSVC)
223248
if (LLAMA_AVX512)
224-
add_compile_options(/arch:AVX512)
249+
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX512>)
250+
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
225251
# MSVC has no compile-time flags enabling specific
226252
# AVX512 extensions, neither it defines the
227253
# macros corresponding to the extensions.
228254
# Do it manually.
229255
if (LLAMA_AVX512_VBMI)
230-
add_compile_definitions(__AVX512VBMI__)
256+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
257+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
231258
endif()
232259
if (LLAMA_AVX512_VNNI)
233-
add_compile_definitions(__AVX512VNNI__)
260+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
261+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
234262
endif()
235263
elseif (LLAMA_AVX2)
236-
add_compile_options(/arch:AVX2)
264+
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX2>)
265+
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX2>)
237266
elseif (LLAMA_AVX)
238-
add_compile_options(/arch:AVX)
267+
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX>)
268+
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX>)
239269
endif()
240270
else()
241271
if (LLAMA_F16C)
@@ -272,7 +302,8 @@ endif()
272302

273303
add_library(ggml OBJECT
274304
ggml.c
275-
ggml.h)
305+
ggml.h
306+
${GGML_CUDA_SOURCES})
276307

277308
target_include_directories(ggml PUBLIC .)
278309
target_compile_features(ggml PUBLIC c_std_11) # don't bump
@@ -294,6 +325,14 @@ if (BUILD_SHARED_LIBS)
294325
target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD)
295326
endif()
296327

328+
if (GGML_CUDA_SOURCES)
329+
message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
330+
set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF)
331+
set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
332+
set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF)
333+
endif()
334+
335+
297336
#
298337
# programs, examples and tests
299338
#
@@ -305,4 +344,5 @@ endif ()
305344

306345
if (LLAMA_BUILD_EXAMPLES)
307346
add_subdirectory(examples)
347+
add_subdirectory(pocs)
308348
endif()

Makefile

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Define the default target now so that it is always the first target
2+
default: main quantize quantize-stats perplexity embedding vdot
3+
14
ifndef UNAME_S
25
UNAME_S := $(shell uname -s)
36
endif
@@ -36,7 +39,7 @@ CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
3639
LDFLAGS =
3740

3841
# warnings
39-
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
42+
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith
4043
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
4144

4245
# OS specific
@@ -97,6 +100,13 @@ ifdef LLAMA_OPENBLAS
97100
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
98101
LDFLAGS += -lopenblas
99102
endif
103+
ifdef LLAMA_CUBLAS
104+
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
105+
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
106+
OBJS += ggml-cuda.o
107+
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
108+
nvcc -arch=native -c -o $@ $<
109+
endif
100110
ifdef LLAMA_GPROF
101111
CFLAGS += -pg
102112
CXXFLAGS += -pg
@@ -133,8 +143,6 @@ $(info I CC: $(CCV))
133143
$(info I CXX: $(CXXV))
134144
$(info )
135145

136-
default: main quantize quantize-stats perplexity embedding
137-
138146
#
139147
# Build library
140148
#
@@ -151,32 +159,35 @@ common.o: examples/common.cpp examples/common.h
151159
clean:
152160
rm -vf *.o main quantize quantize-stats perplexity embedding benchmark-q4_0-matmult
153161

154-
main: examples/main/main.cpp ggml.o llama.o common.o
162+
main: examples/main/main.cpp ggml.o llama.o common.o $(OBJS)
155163
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
156164
@echo
157165
@echo '==== Run ./main -h for help. ===='
158166
@echo
159167

160-
quantize: examples/quantize/quantize.cpp ggml.o llama.o
168+
quantize: examples/quantize/quantize.cpp ggml.o llama.o $(OBJS)
169+
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
170+
171+
quantize-stats: examples/quantize-stats/quantize-stats.cpp ggml.o llama.o $(OBJS)
161172
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
162173

163-
quantize-stats: examples/quantize-stats/quantize-stats.cpp ggml.o llama.o
174+
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o $(OBJS)
164175
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
165176

166-
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
177+
embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o $(OBJS)
167178
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
168179

169-
embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
180+
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
170181
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
171182

172-
libllama.so: llama.o ggml.o
183+
libllama.so: llama.o ggml.o $(OBJS)
173184
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
174185

175186
#
176187
# Tests
177188
#
178189

179-
benchmark: examples/benchmark/benchmark-q4_0-matmult.c ggml.o
190+
benchmark: examples/benchmark/benchmark-q4_0-matmult.c ggml.o $(OBJS)
180191
$(CXX) $(CXXFLAGS) $^ -o benchmark-q4_0-matmult $(LDFLAGS)
181192
./benchmark-q4_0-matmult
182193

0 commit comments

Comments
 (0)