Skip to content

Commit 22504ec

Browse files
committed
Merge branch 'master' into compilade/batch-splits
2 parents c51daef + 5e116e8 commit 22504ec

File tree

182 files changed

+18471
-149544
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

182 files changed

+18471
-149544
lines changed

.devops/nix/package.nix

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
vulkan-headers,
1919
vulkan-loader,
2020
curl,
21+
shaderc,
2122
useBlas ? builtins.all (x: !x) [
2223
useCuda
2324
useMetalKit
@@ -89,6 +90,22 @@ let
8990
ps.tiktoken
9091
ps.torchWithoutCuda
9192
ps.transformers
93+
94+
# server bench
95+
ps.matplotlib
96+
97+
# server tests
98+
ps.openai
99+
ps.behave
100+
ps.prometheus-client
101+
102+
# for examples/pydantic-models-to-grammar-examples.py
103+
ps.docstring-parser
104+
ps.pydantic
105+
106+
# for scripts/compare-llama-bench.py
107+
ps.gitpython
108+
ps.tabulate
92109
]
93110
);
94111

@@ -130,6 +147,7 @@ let
130147
vulkanBuildInputs = [
131148
vulkan-headers
132149
vulkan-loader
150+
shaderc
133151
];
134152
in
135153

.devops/tools.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ arg1="$1"
88
shift
99

1010
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
11-
python3 ./convert-hf-to-gguf.py "$@"
11+
python3 ./convert_hf_to_gguf.py "$@"
1212
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
./llama-quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then

.github/labeler.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ SYCL:
1616
- any-glob-to-any-file:
1717
- ggml/include/ggml-sycl.h
1818
- ggml/src/ggml-sycl.cpp
19-
- README-sycl.md
19+
- ggml/src/ggml-sycl/**
20+
- docs/backend/SYCL.md
21+
- examples/sycl/**
2022
Nvidia GPU:
2123
- changed-files:
2224
- any-glob-to-any-file:

.github/workflows/build.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,10 @@ jobs:
355355
- name: Dependencies
356356
id: depends
357357
run: |
358-
sudo apt-get update
359-
sudo apt-get install build-essential libvulkan-dev
358+
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
359+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
360+
sudo apt-get update -y
361+
sudo apt-get install -y build-essential vulkan-sdk
360362
361363
- name: Build
362364
id: cmake_build
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Python Type-Check
2+
3+
on:
4+
push:
5+
paths:
6+
- '.github/workflows/python-type-check.yml'
7+
- '**.py'
8+
- '**/requirements*.txt'
9+
pull_request:
10+
paths:
11+
- '.github/workflows/python-type-check.yml'
12+
- '**.py'
13+
- '**/requirements*.txt'
14+
15+
concurrency:
16+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
17+
cancel-in-progress: true
18+
19+
jobs:
20+
python-type-check:
21+
runs-on: ubuntu-latest
22+
name: pyright type-check
23+
steps:
24+
- name: Check out source repository
25+
uses: actions/checkout@v4
26+
- name: Set up Python environment
27+
uses: actions/setup-python@v5
28+
with:
29+
python-version: "3.11"
30+
- name: Install Python dependencies
31+
# TODO: use a venv
32+
run: pip install -r requirements/requirements-all.txt
33+
- name: Type-check with Pyright
34+
uses: jakebailey/pyright-action@v2
35+
with:
36+
version: 1.1.370
37+
level: warning
38+
warnings: true

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ build*
4747
!build-info.cpp.in
4848
!build-info.sh
4949
!build.zig
50+
!docs/build.md
5051
/libllama.so
5152
/llama-*
5253
android-ndk-*
@@ -60,6 +61,11 @@ llama-batched-swift
6061
out/
6162
tmp/
6263

64+
# Deprecated
65+
66+
/main
67+
/server
68+
6369
# CI
6470

6571
!.github/workflows/*.yml

CMakeLists.txt

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,6 @@ endif()
5050
# option list
5151
#
5252

53-
# general
54-
option(LLAMA_CCACHE "llama: use ccache if available" ON)
55-
5653
# debug
5754
option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
5855
option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
@@ -77,7 +74,6 @@ option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
7774
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
7875

7976
# override ggml options
80-
set(GGML_CCACHE ${LLAMA_CCACHE})
8177
set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD})
8278
set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS})
8379
set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
@@ -115,7 +111,10 @@ llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
115111
# build the library
116112
#
117113

118-
add_subdirectory(ggml)
114+
if (NOT TARGET ggml)
115+
add_subdirectory(ggml)
116+
# ... otherwise assume ggml is added by a parent CMakeLists.txt
117+
endif()
119118
add_subdirectory(src)
120119

121120
#
@@ -133,7 +132,16 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
133132
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
134133
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
135134

136-
get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
135+
136+
# At the moment some compile definitions are placed within the ggml/src
137+
# directory but not exported on the `ggml` target. This could be improved by
138+
# determining _precisely_ which defines are necessary for the llama-config
139+
# package.
140+
#
141+
get_directory_property(GGML_DIR_DEFINES DIRECTORY ggml/src COMPILE_DEFINITIONS)
142+
get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
143+
set(GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES} ${GGML_DIR_DEFINES})
144+
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
137145

138146
set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
139147
install(TARGETS llama LIBRARY PUBLIC_HEADER)

CONTRIBUTING.md

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,24 @@
1-
# Contributing Guidelines
1+
# Pull requests
22

3-
## Checklist
3+
- Always squash-merge the PR before merging
4+
- Use the following format for your final commit: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
5+
- Test your changes:
6+
- Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
7+
- Execute [the full CI locally on your machine](ci/README.md) before publishing
8+
- If the pull request contains only documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times
9+
- Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
10+
- The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your conveience
411

5-
* Make sure your PR follows the [coding guidelines](https://github.com/ggerganov/llama.cpp/blob/master/README.md#coding-guidelines)
6-
* Test your changes using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
7-
* Execute [the full CI locally on your machine](ci/README.md) before publishing
12+
# Coding guidelines
813

9-
## PR formatting
14+
- Avoid adding third-party dependencies, extra files, extra headers, etc.
15+
- Always consider cross-compatibility with other operating systems and architectures
16+
- Avoid fancy looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
17+
- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit
18+
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
19+
- Naming usually optimizes for common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)
20+
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
21+
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
22+
23+
![matmul](media/matmul.png)
1024

11-
* Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
12-
- The PR template has a series of review complexity checkboxes `[ ]` that you can mark as `[X]` for your conveience. Refer to [About task lists](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) for more information.
13-
* If the pull request only contains documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times.
14-
* When squashing multiple commits on merge, use the following format for your commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : Fix typo in utils.py (#1234)`

0 commit comments

Comments
 (0)