Skip to content

Commit 9b00a7e

Browse files
committed
Merge remote-tracking branch 'origin/master' into sl/remove-task-type
2 parents 486d061 + 6a2f298 commit 9b00a7e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+22917
-21015
lines changed

.editorconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,5 @@ indent_size = 2
2828
indent_style = tab
2929

3030
[examples/cvector-generator/*.txt]
31+
trim_trailing_whitespace = unset
3132
insert_final_newline = unset

.github/workflows/server.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030

3131
strategy:
3232
matrix:
33-
sanitizer: [ADDRESS, THREAD, UNDEFINED]
33+
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
3434
build_type: [RelWithDebInfo]
3535
include:
3636
- build_type: Release

.gitignore

Lines changed: 71 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,90 +1,123 @@
1-
*.o
1+
# Extensions
2+
23
*.a
3-
*.so
4-
*.gguf
5-
*.gguf.json
4+
*.bat
65
*.bin
7-
*.exe
86
*.dll
9-
*.log
10-
*.gcov
11-
*.gcno
12-
*.gcda
137
*.dot
14-
*.bat
15-
*.tmp
16-
*.metallib
178
*.etag
9+
*.exe
10+
*.gcda
11+
*.gcno
12+
*.gcov
13+
*.gguf
14+
*.gguf.json
1815
*.lastModified
19-
.DS_Store
20-
.build/
16+
*.log
17+
*.metallib
18+
*.o
19+
*.so
20+
*.tmp
21+
22+
# IDE / OS
23+
2124
.cache/
2225
.ccls-cache/
2326
.direnv/
27+
.DS_Store
2428
.envrc
29+
.idea/
2530
.swiftpm
26-
.venv
27-
.clang-tidy
2831
.vs/
2932
.vscode/
30-
.idea/
33+
nppBackup
3134

32-
ggml-metal-embed.metal
3335

34-
lcov-report/
36+
# Coverage
37+
3538
gcovr-report/
39+
lcov-report/
40+
41+
# Build Artifacts
3642

3743
tags
44+
.build/
3845
build*
46+
!build-info.cmake
47+
!build-info.cpp.in
48+
!build-info.sh
3949
!build.zig
40-
cmake-build-*
50+
/libllama.so
51+
/llama-*
4152
android-ndk-*
53+
arm_neon.h
54+
cmake-build-*
55+
CMakeSettings.json
56+
compile_commands.json
57+
ggml-metal-embed.metal
58+
llama-batched-swift
4259
out/
4360
tmp/
4461

62+
# CI
63+
64+
!.github/workflows/*.yml
65+
66+
# Models
67+
4568
models/*
4669
models-mnt
70+
!models/.editorconfig
71+
!models/ggml-vocab-*.gguf*
4772

48-
/Pipfile
49-
/libllama.so
50-
/llama-*
51-
llama-batched-swift
52-
/common/build-info.cpp
53-
arm_neon.h
54-
compile_commands.json
55-
CMakeSettings.json
56-
57-
__pycache__
58-
dist
73+
# Zig
5974

6075
zig-out/
6176
zig-cache/
6277

78+
# Logs
79+
6380
ppl-*.txt
6481
qnt-*.txt
6582
perf-*.txt
6683

84+
# Examples
85+
6786
examples/jeopardy/results.txt
87+
examples/server/*.css.hpp
6888
examples/server/*.html.hpp
6989
examples/server/*.js.hpp
7090
examples/server/*.mjs.hpp
71-
examples/server/*.css.hpp
91+
!build_64.sh
92+
!examples/*.bat
93+
!examples/*/*.kts
94+
!examples/*/*/*.kts
95+
!examples/sycl/*.bat
96+
!examples/sycl/*.sh
7297

98+
# Python
99+
100+
__pycache__
101+
.venv
102+
/Pipfile
103+
dist
73104
poetry.lock
74105
poetry.toml
75-
nppBackup
76106

77107
# Test binaries
78-
/tests/test-grammar-parser
79-
/tests/test-llama-grammar
108+
/tests/test-backend-ops
80109
/tests/test-double-float
81110
/tests/test-grad0
111+
/tests/test-grammar-parser
112+
/tests/test-llama-grammar
82113
/tests/test-opt
83114
/tests/test-quantize-fns
84115
/tests/test-quantize-perf
116+
/tests/test-rope
85117
/tests/test-sampling
86118
/tests/test-tokenizer-0
87-
/tests/test-tokenizer-1-spm
88119
/tests/test-tokenizer-1-bpe
89-
/tests/test-rope
90-
/tests/test-backend-ops
120+
/tests/test-tokenizer-1-spm
121+
122+
# Scripts
123+
!/scripts/install-oneapi.bat

CMakeLists.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,7 @@ if (LLAMA_SYCL)
665665
#todo: AOT
666666

667667
find_package(IntelSYCL REQUIRED)
668+
find_package(MKL REQUIRED)
668669

669670
message(STATUS "SYCL found")
670671

@@ -679,11 +680,9 @@ if (LLAMA_SYCL)
679680
endif()
680681

681682
add_compile_options(-I./) #include DPCT
682-
add_compile_options(-I/${SYCL_INCLUDE_DIR})
683683

684684
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
685685
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
686-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
687686
if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
688687
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
689688
endif()
@@ -693,8 +692,10 @@ if (LLAMA_SYCL)
693692
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
694693

695694
if (WIN32)
696-
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
695+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
697696
else()
697+
add_compile_options(-I/${SYCL_INCLUDE_DIR})
698+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
698699
if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
699700
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
700701
elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")

CMakePresets.json

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,21 @@
1111
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
1212
}
1313
},
14-
14+
{
15+
"name": "sycl-base",
16+
"hidden": true,
17+
"generator": "Ninja",
18+
"binaryDir": "${sourceDir}/build-${presetName}",
19+
"cacheVariables": {
20+
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
21+
"CMAKE_CXX_COMPILER": "icx",
22+
"LLAMA_SYCL": "ON",
23+
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
24+
}
25+
},
1526
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
16-
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
27+
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
28+
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
1729
{ "name": "static", "hidden": true, "cacheVariables": { "LLAMA_STATIC": "ON" } },
1830

1931
{
@@ -35,15 +47,18 @@
3547
},
3648

3749
{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
38-
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "release" ] },
39-
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "release", "static" ] },
50+
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
51+
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
4052

4153
{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
42-
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "release" ] },
43-
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "release", "static" ] },
54+
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
55+
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
4456

4557
{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
46-
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "release" ] },
47-
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "release", "static" ] }
58+
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
59+
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
60+
61+
{ "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] },
62+
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }
4863
]
4964
}

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1051,7 +1051,7 @@ tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-
10511051
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
10521052
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
10531053

1054-
tests/test-grammar-integration: tests/test-grammar-integration.cpp ggml.o llama.o grammar-parser.o $(OBJS)
1054+
tests/test-grammar-integration: tests/test-grammar-integration.cpp json-schema-to-grammar.o ggml.o llama.o grammar-parser.o $(OBJS)
10551055
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
10561056
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
10571057

README-sycl.md

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -410,15 +410,9 @@ Output (example):
410410

411411
4. Install build tools
412412

413-
a. Download & install cmake for Windows: https://cmake.org/download/
413+
a. Download & install cmake for Windows: https://cmake.org/download/ (CMake can also be installed from Visual Studio Installer)
414+
b. The new Visual Studio will install Ninja as default. (If not, please install it manually: https://ninja-build.org/)
414415

415-
b. Download & install mingw-w64 make for Windows provided by w64devkit
416-
417-
- Download the 1.19.0 version of [w64devkit](https://github.com/skeeto/w64devkit/releases/download/v1.19.0/w64devkit-1.19.0.zip).
418-
419-
- Extract `w64devkit` on your pc.
420-
421-
- Add the **bin** folder path in the Windows system PATH environment (for e.g. `C:\xxx\w64devkit\bin\`).
422416

423417
### II. Build llama.cpp
424418

@@ -428,10 +422,10 @@ On the oneAPI command line window, step into the llama.cpp main directory and ru
428422
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
429423
430424
# Option 1: Use FP32 (recommended for better performance in most cases)
431-
cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
425+
cmake -B build -G "Ninja" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
432426
433427
# Option 2: Or FP16
434-
cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
428+
cmake -B build -G "Ninja" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
435429
436430
cmake --build build --config Release -j
437431
```
@@ -441,9 +435,23 @@ Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former in
441435
.\examples\sycl\win-build-sycl.bat
442436
```
443437

438+
Or, use CMake presets to build:
439+
```sh
440+
cmake --preset x64-windows-sycl-release
441+
cmake --build build-x64-windows-sycl-release -j --target llama-cli
442+
443+
cmake -DLLAMA_SYCL_F16=ON --preset x64-windows-sycl-release
444+
cmake --build build-x64-windows-sycl-release -j --target llama-cli
445+
446+
cmake --preset x64-windows-sycl-debug
447+
cmake --build build-x64-windows-sycl-debug -j --target llama-cli
448+
```
449+
450+
Or, you can use Visual Studio to open llama.cpp folder as a CMake project. Choose the sycl CMake presets (`x64-windows-sycl-release` or `x64-windows-sycl-debug`) before you compile the project.
451+
444452
*Notes:*
445453

446-
- By default, calling `make` will build all target binary files. In case of a minimal experimental setup, the user can build the inference executable only through `make llama-cli`.
454+
- In case of a minimal experimental setup, the user can build the inference executable only through `cmake --build build --config Release -j --target llama-cli`.
447455

448456
### III. Run the inference
449457

common/common.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include "llama.h"
77

88
#include <algorithm>
9-
#include <cassert>
109
#include <cinttypes>
1110
#include <cmath>
1211
#include <codecvt>
@@ -542,6 +541,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
542541
/**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; }
543542
else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; }
544543
else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; }
544+
else if (value == "last") { params.pooling_type = LLAMA_POOLING_TYPE_LAST; }
545545
else { invalid_param = true; }
546546
return true;
547547
}
@@ -1870,6 +1870,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
18701870

18711871
options.push_back({ "backend" });
18721872
options.push_back({ "*", " --rpc SERVERS", "comma separated list of RPC servers" });
1873+
18731874
if (llama_supports_mlock()) {
18741875
options.push_back({ "*", " --mlock", "force system to keep model in RAM rather than swapping or compressing" });
18751876
}
@@ -1988,8 +1989,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
19881989
options.push_back({ "cvector", " --completions-file FNAME",
19891990
"completions file (default: '%s')", params.cvector_completions_file.c_str() });
19901991
options.push_back({ "cvector", " --completions N", "number of lines of completions file to use (default: %d)", params.n_completions });
1991-
options.push_back({ "cvector", " --batch-pca N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
1992-
options.push_back({ "cvector", " --iter-pca N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
1992+
options.push_back({ "cvector", " --pca-batch N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
1993+
options.push_back({ "cvector", " --pca-iter N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
19931994

19941995
printf("usage: %s [options]\n", argv[0]);
19951996

@@ -2657,7 +2658,14 @@ static bool llama_download_file(const std::string & url, const std::string & pat
26572658
}
26582659

26592660
// Set the output file
2660-
std::unique_ptr<FILE, decltype(&fclose)> outfile(fopen(path_temporary.c_str(), "wb"), fclose);
2661+
2662+
struct FILE_deleter {
2663+
void operator()(FILE * f) const {
2664+
fclose(f);
2665+
}
2666+
};
2667+
2668+
std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
26612669
if (!outfile) {
26622670
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path.c_str());
26632671
return false;

convert-hf-to-gguf-update.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,15 +214,15 @@ def get_vocab_base_pre(self, tokenizer) -> str:
214214
"""
215215

216216
convert_py_pth = pathlib.Path("convert-hf-to-gguf.py")
217-
convert_py = convert_py_pth.read_text()
217+
convert_py = convert_py_pth.read_text(encoding="utf-8")
218218
convert_py = re.sub(
219219
r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
220220
lambda m: m.group(1) + src_func + m.group(3),
221221
convert_py,
222222
flags=re.DOTALL | re.MULTILINE,
223223
)
224224

225-
convert_py_pth.write_text(convert_py)
225+
convert_py_pth.write_text(convert_py, encoding="utf-8")
226226

227227
logger.info("+++ convert-hf-to-gguf.py was updated")
228228

0 commit comments

Comments
 (0)