Skip to content

Commit 97c31bb

Browse files
authored
Merge branch 'master' into rwkv-v7
2 parents e6ee7e9 + 19d3c82 commit 97c31bb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+8907
-3727
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ on:
1010
push:
1111
branches:
1212
- master
13-
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
13+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
1414
pull_request:
1515
types: [opened, synchronize, reopened]
16-
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
16+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
1717

1818
concurrency:
1919
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}

.github/workflows/server.yml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,36 @@ jobs:
8181
with:
8282
node-version: '22.11.0'
8383

84+
- name: WebUI - Install dependencies
85+
id: webui_lint
86+
run: |
87+
cd examples/server/webui
88+
npm ci
89+
90+
- name: WebUI - Check code format
91+
id: webui_format
92+
run: |
93+
git config --global --add safe.directory $(realpath .)
94+
cd examples/server/webui
95+
git status
96+
97+
npm run format
98+
git status
99+
modified_files="$(git status -s)"
100+
echo "Modified files: ${modified_files}"
101+
if [ -n "${modified_files}" ]; then
102+
echo "Files do not follow coding style. To fix: npm run format"
103+
echo "${modified_files}"
104+
exit 1
105+
fi
106+
84107
- name: Verify bundled index.html
85108
id: verify_server_index_html
86109
run: |
87110
git config --global --add safe.directory $(realpath .)
88111
cd examples/server/webui
89112
git status
90-
npm ci
113+
91114
npm run build
92115
git status
93116
modified_files="$(git status -s)"

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,4 +233,4 @@ configure_file(cmake/llama.pc.in
233233
@ONLY)
234234

235235
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
236-
DESTINATION lib/pkgconfig)
236+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
189189
- [ramalama](https://github.com/containers/ramalama) (MIT)
190190
- [semperai/amica](https://github.com/semperai/amica) (MIT)
191191
- [withcatai/catai](https://github.com/withcatai/catai) (MIT)
192+
- [Autopen](https://github.com/blackhole89/autopen) (GPL)
192193

193194
</details>
194195

cmake/llama.pc.in

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
prefix=@CMAKE_INSTALL_PREFIX@
2-
exec_prefix=${prefix}
3-
libdir=${exec_prefix}/lib
4-
includedir=${prefix}/include
2+
exec_prefix=@CMAKE_INSTALL_PREFIX@
3+
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
4+
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
55

66
Name: llama
77
Description: Port of Facebook's LLaMA model in C/C++
8-
Version: @PROJECT_VERSION@
9-
Libs: -L${libdir} -lggml -lggml-base -lllama
8+
Version: @LLAMA_INSTALL_VERSION@
9+
Libs: -L${libdir} -lggml -lggml-base -lllama
1010
Cflags: -I${includedir}

common/arg.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2324,5 +2324,47 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23242324
}
23252325
).set_examples({LLAMA_EXAMPLE_TTS}));
23262326

2327+
add_opt(common_arg(
2328+
{"--embd-bge-small-en-default"},
2329+
string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"),
2330+
[](common_params & params) {
2331+
params.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF";
2332+
params.hf_file = "bge-small-en-v1.5-q8_0.gguf";
2333+
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
2334+
params.embd_normalize = 2;
2335+
params.n_ctx = 512;
2336+
params.verbose_prompt = true;
2337+
params.embedding = true;
2338+
}
2339+
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
2340+
2341+
add_opt(common_arg(
2342+
{"--embd-e5-small-en-default"},
2343+
string_format("use default e5-small-v2 model (note: can download weights from the internet)"),
2344+
[](common_params & params) {
2345+
params.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF";
2346+
params.hf_file = "e5-small-v2-q8_0.gguf";
2347+
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
2348+
params.embd_normalize = 2;
2349+
params.n_ctx = 512;
2350+
params.verbose_prompt = true;
2351+
params.embedding = true;
2352+
}
2353+
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
2354+
2355+
add_opt(common_arg(
2356+
{"--embd-gte-small-default"},
2357+
string_format("use default gte-small model (note: can download weights from the internet)"),
2358+
[](common_params & params) {
2359+
params.hf_repo = "ggml-org/gte-small-Q8_0-GGUF";
2360+
params.hf_file = "gte-small-q8_0.gguf";
2361+
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
2362+
params.embd_normalize = 2;
2363+
params.n_ctx = 512;
2364+
params.verbose_prompt = true;
2365+
params.embedding = true;
2366+
}
2367+
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
2368+
23272369
return ctx_arg;
23282370
}

common/llguidance.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,10 @@ llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, const char * g
254254
};
255255
}
256256

257-
return new llama_sampler{
257+
return llama_sampler_init(
258258
/* .iface = */ &llama_sampler_llg_i,
259-
/* .ctx = */ ctx,
260-
};
259+
/* .ctx = */ ctx
260+
);
261261
}
262262

263263
#else

common/log.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "ggml.h" // for ggml_log_level
44

5+
#define LOG_CLR_TO_EOL "\033[K\r"
56
#define LOG_COL_DEFAULT "\033[0m"
67
#define LOG_COL_BOLD "\033[1m"
78
#define LOG_COL_RED "\033[31m"

docs/build.md

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -125,21 +125,66 @@ For detailed info, please refer to [llama.cpp for SYCL](./backend/SYCL.md).
125125
126126
## CUDA
127127
128-
This provides GPU acceleration using an NVIDIA GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from the [NVIDIA developer site](https://developer.nvidia.com/cuda-downloads).
128+
This provides GPU acceleration using an NVIDIA GPU. Make sure to have the [CUDA toolkit](https://developer.nvidia.com/cuda-toolkit) installed.
129129
130-
If you are using Fedora (using Fedora Workstation, or an 'Atomic' variant such as Silverblue), or would like to set up CUDA in a toolbox, please consider our [Fedora CUDA guide](./cuda-fedora.md). Unfortunately, the process is not as simple as one might expect.
130+
#### Download directly from NVIDIA
131+
You may find the official downloads here: [NVIDIA developer site](https://developer.nvidia.com/cuda-downloads).
131132
132-
- Using `CMake`:
133133
134-
```bash
135-
cmake -B build -DGGML_CUDA=ON
136-
cmake --build build --config Release
137-
```
134+
#### Compile and run inside a Fedora Toolbox Container
135+
We also have a [guide](./cuda-fedora.md) for setting up CUDA toolkit in a Fedora [toolbox container](https://containertoolbx.org/).
136+
137+
**Recommended for:**
138+
139+
- ***Particularly*** *convenient* for users of [Atomic Desktops for Fedora](https://fedoraproject.org/atomic-desktops/); such as: [Silverblue](https://fedoraproject.org/atomic-desktops/silverblue/) and [Kinoite](https://fedoraproject.org/atomic-desktops/kinoite/).
140+
- Toolbox is installed by default: [Fedora Workstation](https://fedoraproject.org/workstation/) or [Fedora KDE Plasma Desktop](https://fedoraproject.org/spins/kde).
141+
- *Optionally* toolbox packages are available: [Arch Linux](https://archlinux.org/), [Red Hat Enterprise Linux >= 8.5](https://www.redhat.com/en/technologies/linux-platforms/enterprise-linux), or [Ubuntu](https://ubuntu.com/download)
142+
143+
144+
### Compilation
145+
```bash
146+
cmake -B build -DGGML_CUDA=ON
147+
cmake --build build --config Release
148+
```
149+
150+
### Override Compute Capability Specifications
151+
152+
If `nvcc` cannot detect your gpu, you may get compile-warnings such as:
153+
```text
154+
nvcc warning : Cannot find valid GPU for '-arch=native', default arch is used
155+
```
138156

139-
The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used.
157+
To override the `native` GPU detection:
158+
159+
#### 1. Take note of the `Compute Capability` of your NVIDIA devices: ["CUDA: Your GPU Compute > Capability"](https://developer.nvidia.com/cuda-gpus).
160+
161+
```text
162+
GeForce RTX 4090 8.9
163+
GeForce RTX 3080 Ti 8.6
164+
GeForce RTX 3070 8.6
165+
```
166+
167+
#### 2. Manually list each varying `Compute Capability` in the `CMAKE_CUDA_ARCHITECTURES` list.
168+
169+
```bash
170+
cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES="86;89"
171+
```
172+
173+
### Runtime CUDA environmental variables
174+
175+
You may set the [cuda environmental variables](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) at runtime.
176+
177+
```bash
178+
# Use `CUDA_VISIBLE_DEVICES` to hide the first compute device.
179+
CUDA_VISIBLE_DEVICES="-0" ./build/bin/llama-server --model /srv/models/llama.gguf
180+
```
181+
182+
### Unified Memory
140183

141184
The environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1` can be used to enable unified memory in Linux. This allows swapping to system RAM instead of crashing when the GPU VRAM is exhausted. In Windows this setting is available in the NVIDIA control panel as `System Memory Fallback`.
142185

186+
### Performance Tuning
187+
143188
The following compilation options are also available to tweak performance:
144189

145190
| Option | Legal values | Default | Description |

0 commit comments

Comments
 (0)