Skip to content

Commit ac0f33c

Browse files
committed
Merge branch 'master' into compilade/fix-mpt-pretok
2 parents db2ffd5 + f1948f1 commit ac0f33c

File tree

172 files changed

+16178
-3285
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

172 files changed

+16178
-3285
lines changed

.devops/nix/package.nix

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,18 @@
1717
rocmPackages,
1818
vulkan-headers,
1919
vulkan-loader,
20-
clblast,
20+
curl,
2121
useBlas ? builtins.all (x: !x) [
2222
useCuda
2323
useMetalKit
24-
useOpenCL
2524
useRocm
2625
useVulkan
2726
] && blas.meta.available,
2827
useCuda ? config.cudaSupport,
29-
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
28+
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
3029
useMpi ? false, # Increases the runtime closure size by ~700M
31-
useOpenCL ? false,
3230
useRocm ? config.rocmSupport,
31+
enableCurl ? true,
3332
useVulkan ? false,
3433
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
3534

@@ -56,7 +55,6 @@ let
5655
++ lib.optionals useCuda [ "CUDA" ]
5756
++ lib.optionals useMetalKit [ "MetalKit" ]
5857
++ lib.optionals useMpi [ "MPI" ]
59-
++ lib.optionals useOpenCL [ "OpenCL" ]
6058
++ lib.optionals useRocm [ "ROCm" ]
6159
++ lib.optionals useVulkan [ "Vulkan" ];
6260

@@ -198,19 +196,19 @@ effectiveStdenv.mkDerivation (
198196
optionals effectiveStdenv.isDarwin darwinBuildInputs
199197
++ optionals useCuda cudaBuildInputs
200198
++ optionals useMpi [ mpi ]
201-
++ optionals useOpenCL [ clblast ]
202199
++ optionals useRocm rocmBuildInputs
203200
++ optionals useBlas [ blas ]
204-
++ optionals useVulkan vulkanBuildInputs;
201+
++ optionals useVulkan vulkanBuildInputs
202+
++ optionals enableCurl [ curl ];
205203

206204
cmakeFlags =
207205
[
208206
(cmakeBool "LLAMA_BUILD_SERVER" true)
209207
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
210208
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
209+
(cmakeBool "LLAMA_CURL" enableCurl)
211210
(cmakeBool "GGML_NATIVE" false)
212211
(cmakeBool "GGML_BLAS" useBlas)
213-
(cmakeBool "GGML_CLBLAST" useOpenCL)
214212
(cmakeBool "GGML_CUDA" useCuda)
215213
(cmakeBool "GGML_HIPBLAS" useRocm)
216214
(cmakeBool "GGML_METAL" useMetalKit)
@@ -254,7 +252,6 @@ effectiveStdenv.mkDerivation (
254252
useCuda
255253
useMetalKit
256254
useMpi
257-
useOpenCL
258255
useRocm
259256
useVulkan
260257
;
@@ -281,7 +278,7 @@ effectiveStdenv.mkDerivation (
281278
# Configurations we don't want even the CI to evaluate. Results in the
282279
# "unsupported platform" messages. This is mostly a no-op, because
283280
# cudaPackages would've refused to evaluate anyway.
284-
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
281+
badPlatforms = optionals useCuda lib.platforms.darwin;
285282

286283
# Configurations that are known to result in build failures. Can be
287284
# overridden by importing Nixpkgs with `allowBroken = true`.

.github/ISSUE_TEMPLATE/config.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,3 @@ contact_links:
99
- name: Want to contribute?
1010
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
1111
about: Head to the contribution guide page of the wiki for areas you can help with
12-
13-

.gitignore

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ build*
4747
!build-info.cpp.in
4848
!build-info.sh
4949
!build.zig
50+
!docs/build.md
5051
/libllama.so
5152
/llama-*
5253
android-ndk-*
@@ -98,13 +99,14 @@ examples/server/*.mjs.hpp
9899

99100
# Python
100101

101-
__pycache__
102-
.venv
103-
/Pipfile
104-
dist
105-
poetry.lock
102+
/.venv
103+
__pycache__/
104+
*/poetry.lock
106105
poetry.toml
107106

107+
# Nix
108+
/result
109+
108110
# Test binaries
109111
/tests/test-backend-ops
110112
/tests/test-double-float

CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ endif()
4242

4343
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
4444

45+
if (WIN32)
46+
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
47+
endif()
48+
4549
#
4650
# option list
4751
#
@@ -152,7 +156,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
152156
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)
153157

154158
install(
155-
FILES convert-hf-to-gguf.py
159+
FILES convert_hf_to_gguf.py
156160
PERMISSIONS
157161
OWNER_READ
158162
OWNER_WRITE

CMakePresets.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"cacheVariables": {
2020
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
2121
"CMAKE_CXX_COMPILER": "icx",
22+
"CMAKE_C_COMPILER": "cl",
2223
"GGML_SYCL": "ON",
2324
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
2425
}

CONTRIBUTING.md

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,24 @@
1-
# Contributing Guidelines
1+
# Pull requests
22

3-
## Checklist
3+
- Always squash-merge the PR before merging
4+
- Use the following format for your final commit: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
5+
- Test your changes:
6+
- Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
7+
- Execute [the full CI locally on your machine](ci/README.md) before publishing
8+
- If the pull request contains only documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times
9+
- Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
10+
- The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your conveience
411

5-
* Make sure your PR follows the [coding guidelines](https://github.com/ggerganov/llama.cpp/blob/master/README.md#coding-guidelines)
6-
* Test your changes using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
7-
* Execute [the full CI locally on your machine](ci/README.md) before publishing
12+
# Coding guidelines
813

9-
## PR formatting
14+
- Avoid adding third-party dependencies, extra files, extra headers, etc.
15+
- Always consider cross-compatibility with other operating systems and architectures
16+
- Avoid fancy looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
17+
- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit
18+
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
19+
- Naming usually optimizes for common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)
20+
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
21+
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
22+
23+
![matmul](media/matmul.png)
1024

11-
* Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
12-
- The PR template has a series of review complexity checkboxes `[ ]` that you can mark as `[X]` for your conveience. Refer to [About task lists](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) for more information.
13-
* If the pull request only contains documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times.
14-
* When squashing multiple commits on merge, use the following format for your commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : Fix typo in utils.py (#1234)`

Makefile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ BUILD_TARGETS = \
1414
llama-finetune \
1515
llama-gbnf-validator \
1616
llama-gguf \
17+
llama-gguf-hash \
1718
llama-gguf-split \
1819
llama-gritlm \
1920
llama-imatrix \
@@ -62,6 +63,11 @@ TEST_TARGETS = \
6263
tests/test-tokenizer-1-bpe \
6364
tests/test-tokenizer-1-spm
6465

66+
# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
67+
LEGACY_TARGETS = main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
68+
simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
69+
retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm
70+
6571
# Deprecation aliases
6672
ifdef LLAMA_CUBLAS
6773
$(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.)
@@ -1086,6 +1092,7 @@ clean:
10861092
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
10871093
rm -rvf $(BUILD_TARGETS)
10881094
rm -rvf $(TEST_TARGETS)
1095+
rm -rvf $(LEGACY_TARGETS)
10891096
find examples pocs -type f -name "*.o" -delete
10901097

10911098
#
@@ -1172,6 +1179,23 @@ llama-gguf: examples/gguf/gguf.cpp \
11721179
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
11731180
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
11741181

1182+
examples/gguf-hash/deps/sha1/sha1.o: \
1183+
examples/gguf-hash/deps/sha1/sha1.c
1184+
$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1185+
1186+
examples/gguf-hash/deps/xxhash/xxhash.o: \
1187+
examples/gguf-hash/deps/xxhash/xxhash.c
1188+
$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1189+
1190+
examples/gguf-hash/deps/sha256/sha256.o: \
1191+
examples/gguf-hash/deps/sha256/sha256.c
1192+
$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1193+
1194+
llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\
1195+
$(OBJ_ALL)
1196+
$(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<)
1197+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1198+
11751199
llama-gguf-split: examples/gguf-split/gguf-split.cpp \
11761200
$(OBJ_ALL)
11771201
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)

0 commit comments

Comments
 (0)