Skip to content

Commit db7e8ce

Browse files
authored
Merge branch 'master' into feat-jina-embeddings
2 parents d6ac931 + c0956b0 commit db7e8ce

File tree

113 files changed

+6265
-10055
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+6265
-10055
lines changed

.github/workflows/bench.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ on:
3232
- cron: '04 2 * * *'
3333

3434
concurrency:
35-
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}-${{ github.event.inputs.sha }}
35+
group: ${{ github.workflow }}-${{ github.ref || github.run_id }}-${{ github.event.inputs.sha }}
3636
cancel-in-progress: true
3737

3838
jobs:

.github/workflows/build.yml

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ jobs:
3232
- name: Clone
3333
id: checkout
3434
uses: actions/checkout@v4
35+
with:
36+
fetch-depth: 0
3537

3638
- name: Dependencies
3739
id: depends
@@ -52,7 +54,7 @@ jobs:
5254
id: cmake_test
5355
run: |
5456
cd build
55-
ctest -L main --verbose --timeout 900
57+
ctest -L 'main|curl' --verbose --timeout 900
5658
5759
- name: Determine tag name
5860
id: tag
@@ -88,6 +90,8 @@ jobs:
8890
- name: Clone
8991
id: checkout
9092
uses: actions/checkout@v4
93+
with:
94+
fetch-depth: 0
9195

9296
- name: Dependencies
9397
id: depends
@@ -101,7 +105,9 @@ jobs:
101105
sysctl -a
102106
mkdir build
103107
cd build
104-
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
108+
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
109+
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
110+
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF -DLLAMA_CURL=ON ..
105111
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
106112
107113
- name: Test
@@ -204,26 +210,28 @@ jobs:
204210
- name: Clone
205211
id: checkout
206212
uses: actions/checkout@v4
213+
with:
214+
fetch-depth: 0
207215

208216
- name: Dependencies
209217
id: depends
210218
run: |
211219
sudo apt-get update
212-
sudo apt-get install build-essential
220+
sudo apt-get install build-essential libcurl4-openssl-dev
213221
214222
- name: Build
215223
id: cmake_build
216224
run: |
217225
mkdir build
218226
cd build
219-
cmake .. -DLLAMA_FATAL_WARNINGS=ON
227+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
220228
cmake --build . --config Release -j $(nproc)
221229
222230
- name: Test
223231
id: cmake_test
224232
run: |
225233
cd build
226-
ctest -L main --verbose --timeout 900
234+
ctest -L 'main|curl' --verbose --timeout 900
227235
228236
- name: Test llama2c conversion
229237
id: llama2c_test
@@ -236,6 +244,33 @@ jobs:
236244
./bin/convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
237245
./bin/main -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
238246
247+
- name: Determine tag name
248+
id: tag
249+
shell: bash
250+
run: |
251+
BUILD_NUMBER="$(git rev-list --count HEAD)"
252+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
253+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
254+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
255+
else
256+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
257+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
258+
fi
259+
260+
- name: Pack artifacts
261+
id: pack_artifacts
262+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
263+
run: |
264+
cp LICENSE ./build/bin/
265+
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
266+
267+
- name: Upload artifacts
268+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
269+
uses: actions/upload-artifact@v4
270+
with:
271+
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
272+
name: llama-bin-ubuntu-x64.zip
273+
239274
# ubuntu-latest-cmake-sanitizer:
240275
# runs-on: ubuntu-latest
241276
#
@@ -938,6 +973,12 @@ jobs:
938973
- name: Download artifacts
939974
id: download-artifact
940975
uses: actions/download-artifact@v4
976+
with:
977+
path: ./artifact
978+
979+
- name: Move artifacts
980+
id: move_artifacts
981+
run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
941982

942983
- name: Create release
943984
id: create_release
@@ -956,15 +997,15 @@ jobs:
956997
const path = require('path');
957998
const fs = require('fs');
958999
const release_id = '${{ steps.create_release.outputs.id }}';
959-
for (let file of await fs.readdirSync('./artifact')) {
1000+
for (let file of await fs.readdirSync('./artifact/release')) {
9601001
if (path.extname(file) === '.zip') {
9611002
console.log('uploadReleaseAsset', file);
9621003
await github.repos.uploadReleaseAsset({
9631004
owner: context.repo.owner,
9641005
repo: context.repo.repo,
9651006
release_id: release_id,
9661007
name: file,
967-
data: await fs.readFileSync(`./artifact/${file}`)
1008+
data: await fs.readFileSync(`./artifact/release/${file}`)
9681009
});
9691010
}
9701011
}

.github/workflows/docker.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,20 @@ jobs:
9191
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
9292
fi
9393
94+
- name: Downcase github.repository_owner
95+
run: |
96+
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
97+
env:
98+
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
99+
94100
- name: Build and push Docker image (versioned)
95101
if: github.event_name == 'push'
96102
uses: docker/build-push-action@v4
97103
with:
98104
context: .
99105
push: true
100106
platforms: ${{ matrix.config.platforms }}
101-
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
107+
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
102108
file: ${{ matrix.config.dockerfile }}
103109

104110
- name: Build and push Docker image (tagged)
@@ -107,5 +113,5 @@ jobs:
107113
context: .
108114
push: ${{ github.event_name == 'push' }}
109115
platforms: ${{ matrix.config.platforms }}
110-
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
116+
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
111117
file: ${{ matrix.config.dockerfile }}

.github/workflows/server.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ on:
2323
- cron: '2 4 * * *'
2424

2525
concurrency:
26-
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
26+
group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
2727
cancel-in-progress: true
2828

2929
jobs:

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ lcov-report/
3434
gcovr-report/
3535

3636
build*
37+
!build.zig
3738
cmake-build-*
3839
out/
3940
tmp/
@@ -48,6 +49,7 @@ models-mnt
4849
/convert-llama2c-to-ggml
4950
/embd-input-test
5051
/embedding
52+
/eval-callback
5153
/gguf
5254
/gguf-llama-simple
5355
/gguf-split
@@ -99,6 +101,9 @@ qnt-*.txt
99101
perf-*.txt
100102

101103
examples/jeopardy/results.txt
104+
examples/server/*.html.hpp
105+
examples/server/*.js.hpp
106+
examples/server/*.mjs.hpp
102107

103108
poetry.lock
104109
poetry.toml

CMakeLists.txt

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ else()
4343
set(LLAMA_METAL_DEFAULT OFF)
4444
endif()
4545

46+
# TODO: fix this for Android CI
47+
# https://github.com/ggerganov/llama.cpp/pull/6716#issuecomment-2061509191
48+
#if (CMAKE_SYSTEM_NAME MATCHES "ANDROID")
49+
# set(LLAMA_LLAMAFILE_DEFAULT OFF)
50+
#else()
51+
# set(LLAMA_LLAMAFILE_DEFAULT ON)
52+
#endif()
53+
54+
# TODO: temporary disable until MoE is fixed
55+
# https://github.com/ggerganov/llama.cpp/pull/6716
56+
set(LLAMA_LLAMAFILE_DEFAULT OFF)
57+
4658
# general
4759
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
4860
option(LLAMA_STATIC "llama: static link libraries" OFF)
@@ -88,6 +100,7 @@ endif()
88100
# 3rd party libs
89101
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
90102
option(LLAMA_BLAS "llama: use BLAS" OFF)
103+
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT})
91104
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
92105
option(LLAMA_CUDA "llama: use CUDA" OFF)
93106
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
@@ -286,6 +299,7 @@ if (LLAMA_METAL)
286299
${METALKIT_FRAMEWORK}
287300
)
288301
endif()
302+
289303
if (LLAMA_BLAS)
290304
if (LLAMA_STATIC)
291305
set(BLA_STATIC ON)
@@ -368,6 +382,13 @@ if (LLAMA_BLAS)
368382
endif()
369383
endif()
370384

385+
if (LLAMA_LLAMAFILE)
386+
add_compile_definitions(GGML_USE_LLAMAFILE)
387+
388+
set(GGML_HEADERS_LLAMAFILE sgemm.h)
389+
set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
390+
endif()
391+
371392
if (LLAMA_QKK_64)
372393
add_compile_definitions(GGML_QKK_64)
373394
endif()
@@ -1151,15 +1172,16 @@ add_library(ggml OBJECT
11511172
ggml-backend.h
11521173
ggml-quants.c
11531174
ggml-quants.h
1154-
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1155-
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1156-
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1157-
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1158-
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1159-
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1160-
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1161-
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1162-
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1175+
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1176+
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1177+
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1178+
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1179+
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1180+
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1181+
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1182+
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1183+
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1184+
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
11631185
)
11641186

11651187
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})

Makefile

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Define the default target now so that it is always the first target
22
BUILD_TARGETS = \
33
main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4-
simple batched batched-bench save-load-state server gguf gguf-split llama-bench libllava.a llava-cli baby-llama beam-search \
4+
simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama beam-search \
55
retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o
66

77
# Binaries only useful for tests
@@ -384,6 +384,15 @@ ifdef LLAMA_OPENBLAS
384384
MK_LDFLAGS += $(shell pkg-config --libs openblas)
385385
endif # LLAMA_OPENBLAS
386386

387+
# TODO: temporary disable until MoE is fixed
388+
# https://github.com/ggerganov/llama.cpp/pull/6716
389+
LLAMA_NO_LLAMAFILE := 1
390+
391+
ifndef LLAMA_NO_LLAMAFILE
392+
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
393+
OBJS += sgemm.o
394+
endif
395+
387396
ifdef LLAMA_BLIS
388397
MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
389398
MK_LDFLAGS += -lblis -L/usr/local/lib
@@ -480,11 +489,9 @@ ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/com
480489

481490
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
482491
$(NVCC_COMPILE)
483-
484492
endif # LLAMA_CUDA
485493

486494
ifdef LLAMA_CLBLAST
487-
488495
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
489496
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
490497
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
@@ -603,6 +610,11 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h
603610
$(CC) $(CFLAGS) -c $< -o $@
604611
endif # LLAMA_MPI
605612

613+
ifndef LLAMA_NO_LLAMAFILE
614+
sgemm.o: sgemm.cpp sgemm.h ggml.h
615+
$(CXX) $(CXXFLAGS) -c $< -o $@
616+
endif
617+
606618
GF_CC := $(CC)
607619
include scripts/get-flags.mk
608620

@@ -646,7 +658,7 @@ CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])'
646658
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
647659
ifndef CUDA_DOCKER_ARCH
648660
ifndef CUDA_POWER_ARCH
649-
$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via CUDA_DOCKER_ARCH)
661+
$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
650662
endif # CUDA_POWER_ARCH
651663
endif # CUDA_DOCKER_ARCH
652664
endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
@@ -687,8 +699,8 @@ OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
687699
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
688700
$(CXX) $(CXXFLAGS) -c $< -o $@
689701

690-
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h
691-
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o
702+
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
703+
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
692704

693705
common.o: common/common.cpp $(COMMON_H_DEPS)
694706
$(CXX) $(CXXFLAGS) -c $< -o $@
@@ -756,7 +768,7 @@ batched: examples/batched/batched.cpp ggml.o llama.o $(C
756768
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
757769
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
758770

759-
batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o $(OBJS)
771+
batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
760772
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
761773
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
762774

@@ -788,10 +800,19 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
788800
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
789801
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
790802

791-
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp json-schema-to-grammar.o common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
803+
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
792804
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
793805
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
794806

807+
# Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
808+
examples/server/%.hpp: examples/server/public/% Makefile
809+
@( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
810+
echo "unsigned char $${NAME}[] = {" && \
811+
cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
812+
echo "};" && \
813+
echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
814+
) > $@
815+
795816
gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
796817
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
797818
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -800,6 +821,10 @@ gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(O
800821
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
801822
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
802823

824+
eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
825+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
826+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
827+
803828
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
804829
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
805830
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

0 commit comments

Comments
 (0)