Skip to content

Commit 85e51b0

Browse files
committed
Merge branch 'layla-build' of https://github.com/l3utterfly/llama.cpp into layla-build
2 parents f3fea11 + 684fea9 commit 85e51b0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+6267
-1729
lines changed

.devops/nix/docker.nix

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
lib,
3+
dockerTools,
4+
buildEnv,
5+
llama-cpp,
6+
interactive ? true,
7+
coreutils,
8+
}:
9+
10+
# A tar that can be fed into `docker load`:
11+
#
12+
# $ nix build .#llamaPackages.docker
13+
# $ docker load < result
14+
15+
# For details and variations cf.
16+
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
17+
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
18+
# - https://nixery.dev/
19+
20+
# Approximate (compressed) sizes, at the time of writing, are:
21+
#
22+
# .#llamaPackages.docker: 125M;
23+
# .#llamaPackagesCuda.docker: 537M;
24+
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
25+
26+
dockerTools.buildLayeredImage {
27+
name = llama-cpp.pname;
28+
tag = "latest";
29+
30+
contents =
31+
[ llama-cpp ]
32+
++ lib.optionals interactive [
33+
coreutils
34+
dockerTools.binSh
35+
dockerTools.caCertificates
36+
];
37+
}

.devops/nix/scope.nix

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,8 @@ lib.makeScope newScope (
1212
self: {
1313
inherit llamaVersion;
1414
llama-cpp = self.callPackage ./package.nix { };
15+
docker = self.callPackage ./docker.nix { };
16+
docker-min = self.callPackage ./docker.nix { interactive = false; };
17+
sif = self.callPackage ./sif.nix { };
1518
}
1619
)

.devops/nix/sif.nix

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
lib,
3+
singularity-tools,
4+
llama-cpp,
5+
bashInteractive,
6+
interactive ? false,
7+
}:
8+
9+
let
10+
optionalInt = cond: x: if cond then x else 0;
11+
in
12+
singularity-tools.buildImage rec {
13+
inherit (llama-cpp) name;
14+
contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
15+
16+
# These are excessive (but safe) for most variants. Building singularity
17+
# images requires superuser privileges, so we build them inside a VM in a
18+
# writable image of pre-determined size.
19+
#
20+
# ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
21+
#
22+
# Expected image sizes:
23+
# - cpu/blas: 150M,
24+
# - cuda, all gencodes: 560M,
25+
diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
26+
memSize = diskSize;
27+
}

.github/ISSUE_TEMPLATE/bug.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@ assignees: ''
77
---
88

99
Please include information about your system, the steps to reproduce the bug, and the version of llama.cpp that you are using. If possible, please provide a minimal code example that reproduces the bug.
10+
11+
If the bug concerns the server, please try to reproduce it first using the [server test scenario framework](https://github.com/ggerganov/llama.cpp/tree/master/examples/server/tests).

.github/workflows/build.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -669,8 +669,7 @@ jobs:
669669
run: |
670670
cd examples/llama.android
671671
672-
# Skip armeabi-v7a for now (https://github.com/llvm/llvm-project/issues/65820).
673-
./gradlew build --no-daemon -Pskip-armeabi-v7a
672+
./gradlew build --no-daemon
674673
675674
# freeBSD-latest:
676675
# runs-on: macos-12

.github/workflows/nix-ci-aarch64.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ on:
1919

2020
jobs:
2121
nix-build-aarch64:
22-
if: ${{ vars.CACHIX_NAME != '' }}
2322
runs-on: ubuntu-latest
2423
steps:
2524
- name: Checkout repository
@@ -37,16 +36,16 @@ jobs:
3736
extra-conf: |
3837
extra-platforms = aarch64-linux
3938
extra-system-features = nixos-test kvm
40-
extra-substituters = https://${{ vars.CACHIX_NAME }}.cachix.org https://cuda-maintainers.cachix.org
41-
extra-trusted-public-keys = ${{ vars.CACHIX_PUBLIC_KEY }} cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
39+
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
40+
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
4241
- uses: DeterminateSystems/magic-nix-cache-action@v2
4342
with:
4443
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
4544
- name: Set-up cachix to push the results to
4645
uses: cachix/cachix-action@v13
4746
with:
4847
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
49-
name: ${{ vars.CACHIX_NAME }}
48+
name: llama-cpp
5049
- name: Show all output paths
5150
run: >
5251
nix run github:nix-community/nix-eval-jobs

.github/workflows/nix-ci.yml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ jobs:
2323
with:
2424
github-token: ${{ secrets.GITHUB_TOKEN }}
2525
extra-conf: |
26-
extra-substituters = https://${{ vars.CACHIX_NAME }}.cachix.org https://cuda-maintainers.cachix.org
27-
extra-trusted-public-keys = ${{ vars.CACHIX_PUBLIC_KEY }} cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
26+
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
27+
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
2828
- uses: DeterminateSystems/magic-nix-cache-action@v2
2929
with:
3030
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
@@ -37,7 +37,6 @@ jobs:
3737
--flake
3838
".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
3939
nix-build:
40-
if: ${{ vars.CACHIX_NAME != '' }}
4140
strategy:
4241
fail-fast: false
4342
matrix:
@@ -51,16 +50,16 @@ jobs:
5150
with:
5251
github-token: ${{ secrets.GITHUB_TOKEN }}
5352
extra-conf: |
54-
extra-substituters = https://${{ vars.CACHIX_NAME }}.cachix.org https://cuda-maintainers.cachix.org
55-
extra-trusted-public-keys = ${{ vars.CACHIX_PUBLIC_KEY }} cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
53+
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
54+
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
5655
- uses: DeterminateSystems/magic-nix-cache-action@v2
5756
with:
5857
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
5958
- name: Set-up cachix to push the results to
6059
uses: cachix/cachix-action@v13
6160
with:
6261
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
63-
name: ${{ vars.CACHIX_NAME }}
62+
name: llama-cpp
6463
- name: Build
6564
run: >
6665
nix run github:Mic92/nix-fast-build

.github/workflows/server.yml

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
# Server build and tests
2+
name: Server
3+
4+
on:
5+
workflow_dispatch: # allows manual triggering
6+
push:
7+
branches:
8+
- master
9+
- test/server-add-ci-test # FIXME remove
10+
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
11+
pull_request:
12+
types: [opened, synchronize, reopened]
13+
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
14+
15+
jobs:
16+
server:
17+
runs-on: ubuntu-latest
18+
19+
strategy:
20+
matrix:
21+
build: [noavx, avx2, avx, avx512, cublas, clblast, openblas, kompute, vulkan]
22+
sanitizer: [ADDRESS, THREAD, UNDEFINED]
23+
build_type: [Debug, Release]
24+
include:
25+
- build: 'noavx'
26+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
27+
image: ubuntu:latest
28+
- build: 'avx2'
29+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
30+
image: ubuntu:latest
31+
- build: 'avx'
32+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF'
33+
image: ubuntu:latest
34+
- build: 'avx512'
35+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON'
36+
image: ubuntu:latest
37+
experimental: true
38+
- build: 'cublas'
39+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON'
40+
image: nvidia/cuda:12.3.1-devel-ubuntu22.04
41+
arch_not_available: true # require nvidia docker engine
42+
- build: 'clblast'
43+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON'
44+
image: ubuntu:latest
45+
arch_not_available: true
46+
- build: 'openblas'
47+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS'
48+
image: ubuntu:latest
49+
- build: 'kompute'
50+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
51+
image: ubuntu:latest
52+
arch_not_available: true
53+
- build: 'vulkan'
54+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON'
55+
image: ubuntu:latest
56+
arch_not_available: true
57+
58+
container:
59+
image: ${{ matrix.image }}
60+
ports:
61+
- 8888
62+
options: --cpus 4
63+
64+
steps:
65+
- name: Clone
66+
id: checkout
67+
uses: actions/checkout@v3
68+
69+
- name: Dependencies
70+
id: depends
71+
run: |
72+
apt-get update
73+
apt-get -y install \
74+
build-essential \
75+
pkg-config \
76+
git \
77+
cmake \
78+
python3-pip \
79+
wget \
80+
psmisc
81+
82+
- name: Download CLBlast
83+
id: get_clblast
84+
if: ${{ matrix.build == 'clblast' }}
85+
run: |
86+
apt install -y libclblast-dev
87+
88+
- name: Download OpenBLAS
89+
id: get_openblas
90+
if: ${{ matrix.build == 'openblas' }}
91+
run: |
92+
apt-get -y install libopenblas-dev
93+
94+
- name: Install Vulkan SDK
95+
id: get_vulkan
96+
if: ${{ matrix.build == 'kompute' || matrix.build == 'vulkan' }}
97+
run: |
98+
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | tee /etc/apt/trusted.gpg.d/lunarg.asc
99+
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
100+
apt-get update
101+
apt-get -y install vulkan-sdk
102+
103+
- name: Build
104+
id: cmake_build
105+
run: |
106+
mkdir build
107+
cd build
108+
cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ${{ matrix.defines }}
109+
cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
110+
111+
- name: Tests dependencies
112+
id: test_dependencies
113+
run: |
114+
pip install -r examples/server/tests/requirements.txt
115+
116+
- name: Download models
117+
id: download_models
118+
run: |
119+
cd examples/server/tests
120+
../../../scripts/hf.sh --repo ggml-org/models --file tinyllamas/stories260K.gguf
121+
122+
- name: Tests
123+
id: server_integration_test
124+
continue-on-error: ${{ matrix.experimental || matrix.arch_not_available }}
125+
run: |
126+
cd examples/server/tests
127+
PORT=8888 ./tests.sh

CMakeLists.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -941,10 +941,16 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STR
941941
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
942942
endif()
943943
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
944-
# Raspberry Pi 2
945-
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
944+
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
945+
# Android armeabi-v7a
946+
list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
947+
else()
948+
# Raspberry Pi 2
949+
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
950+
endif()
946951
endif()
947952
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
953+
# Android arm64-v8a
948954
# Raspberry Pi 3, 4, Zero 2 (32-bit)
949955
list(APPEND ARCH_FLAGS -mno-unaligned-access)
950956
endif()

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ ifdef LLAMA_DEBUG
173173
MK_LDFLAGS += -g
174174

175175
ifeq ($(UNAME_S),Linux)
176-
MK_CXXFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS
176+
MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
177177
endif
178178
else
179179
MK_CPPFLAGS += -DNDEBUG
@@ -597,7 +597,7 @@ $(info I CC: $(shell $(CC) --version | head -n 1))
597597
$(info I CXX: $(shell $(CXX) --version | head -n 1))
598598
ifdef LLAMA_CUBLAS
599599
$(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
600-
CUDA_VERSION := $(shell nvcc --version | grep -oP 'release (\K[0-9]+\.[0-9])')
600+
CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
601601
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
602602
ifndef CUDA_DOCKER_ARCH
603603
ifndef CUDA_POWER_ARCH
@@ -719,7 +719,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
719719
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
720720
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
721721

722-
server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
722+
server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h examples/llava/llava.h examples/llava/llava.cpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
723723
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
724724
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
725725
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2)

README.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,9 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
1010

1111
### Hot topics
1212

13-
- Remove LLAMA_MAX_DEVICES and LLAMA_SUPPORTS_GPU_OFFLOAD: https://github.com/ggerganov/llama.cpp/pull/5240
14-
- Incoming backends: https://github.com/ggerganov/llama.cpp/discussions/5138
15-
- [SYCL backend](README-sycl.md) is ready (1/28/2024), support Linux/Windows in Intel GPUs (iGPU, Arc/Flex/Max series)
16-
- New SOTA quantized models, including pure 2-bits: https://huggingface.co/ikawrakow
17-
- Collecting Apple Silicon performance stats:
18-
- M-series: https://github.com/ggerganov/llama.cpp/discussions/4167
19-
- A-series: https://github.com/ggerganov/llama.cpp/discussions/4508
13+
- Support for chat templates: [Uncyclo (contributions welcome)](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template)
14+
- Support for Gemma models: https://github.com/ggerganov/llama.cpp/pull/5631
15+
- Non-linear quantization IQ4_NL: https://github.com/ggerganov/llama.cpp/pull/5590
2016
- Looking for contributions to improve and maintain the `server` example: https://github.com/ggerganov/llama.cpp/issues/4216
2117

2218
----
@@ -107,6 +103,7 @@ Typically finetunes of the base models below are supported as well.
107103
- [x] [Orion 14B](https://github.com/ggerganov/llama.cpp/pull/5118)
108104
- [x] [InternLM2](https://huggingface.co/models?search=internlm2)
109105
- [x] [CodeShell](https://github.com/WisdomShell/codeshell)
106+
- [x] [Gemma](https://ai.google.dev/gemma)
110107

111108
**Multimodal models:**
112109

@@ -117,6 +114,9 @@ Typically finetunes of the base models below are supported as well.
117114
- [x] [MobileVLM 1.7B/3B models](https://huggingface.co/models?search=mobileVLM)
118115
- [x] [Yi-VL](https://huggingface.co/models?search=Yi-VL)
119116

117+
**HTTP server**
118+
119+
[llama.cpp web server](./examples/server) is a lightweight [OpenAI API](https://github.com/openai/openai-openapi) compatible HTTP server that can be used to serve local models and easily connect them to existing clients.
120120

121121
**Bindings:**
122122

@@ -145,6 +145,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
145145
- [nat/openplayground](https://github.com/nat/openplayground)
146146
- [Faraday](https://faraday.dev/) (proprietary)
147147
- [LMStudio](https://lmstudio.ai/) (proprietary)
148+
- [LocalAI](https://github.com/mudler/LocalAI) (MIT)
148149
- [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL)
149150
- [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile)
150151
- [nomic-ai/gpt4all](https://github.com/nomic-ai/gpt4all)
@@ -157,6 +158,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
157158
- [semperai/amica](https://github.com/semperai/amica)
158159
- [withcatai/catai](https://github.com/withcatai/catai)
159160
- [Mobile-Artificial-Intelligence/maid](https://github.com/Mobile-Artificial-Intelligence/maid) (MIT)
161+
- [Msty](https://msty.app) (proprietary)
160162

161163
---
162164

build.zig

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ pub fn build(b: *std.build.Builder) !void {
123123
const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp");
124124
const train = make.obj("train", "common/train.cpp");
125125
const clip = make.obj("clip", "examples/llava/clip.cpp");
126+
const llava = make.obj("llava", "examples/llava/llava.cpp");
126127

127128
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, console, grammar_parser });
128129
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo });
@@ -131,7 +132,7 @@ pub fn build(b: *std.build.Builder) !void {
131132
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train });
132133
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train });
133134

134-
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip });
135+
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip, llava });
135136
if (server.target.isWindows()) {
136137
server.linkSystemLibrary("ws2_32");
137138
}

0 commit comments

Comments
 (0)