Skip to content

Commit 117cbb2

Browse files
committed
feat: Update llama.cpp
1 parent 04959f1 commit 117cbb2

File tree

3 files changed

+44
-34
lines changed

3 files changed

+44
-34
lines changed

CMakeLists.txt

Lines changed: 37 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,28 @@ project(llama_cpp)
55
option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python package" ON)
66
option(LLAVA_BUILD "Build llava shared library and install alongside python package" ON)
77

8+
if(SKBUILD_STATE STREQUAL "editable")
9+
# Install into the source directory
10+
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
11+
set(LLAMA_CPP_PYTHON_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp)
12+
else()
13+
set(LLAMA_CPP_PYTHON_INSTALL_DIR ${SKBUILD_PLATLIB_DIR}/llama_cpp)
14+
endif()
15+
816
if (LLAMA_BUILD)
917
set(BUILD_SHARED_LIBS "On")
1018

19+
set(CMAKE_SKIP_BUILD_RPATH FALSE)
20+
21+
# When building, don't use the install RPATH already
22+
# (but later on when installing)
23+
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
24+
25+
# Add the automatically determined parts of the RPATH
26+
# which point to directories outside the build tree to the install RPATH
27+
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
28+
set(CMAKE_INSTALL_RPATH "${LLAMA_CPP_PYTHON_INSTALL_DIR}")
29+
1130
# Building llama
1231
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
1332
# Need to disable these llama.cpp flags on Apple x86_64,
@@ -24,31 +43,26 @@ if (LLAMA_BUILD)
2443

2544
add_subdirectory(vendor/llama.cpp)
2645
install(
27-
TARGETS llama
28-
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
29-
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
30-
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
31-
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
32-
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
46+
TARGETS llama
47+
LIBRARY DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
48+
RUNTIME DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
49+
ARCHIVE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
50+
FRAMEWORK DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
51+
RESOURCE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
3352
)
34-
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
3553
install(
36-
TARGETS llama
37-
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
38-
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
39-
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
40-
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
41-
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
54+
TARGETS ggml
55+
LIBRARY DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
56+
RUNTIME DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
57+
ARCHIVE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
58+
FRAMEWORK DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
59+
RESOURCE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
4260
)
4361
# Workaround for Windows + CUDA https://github.com/abetlen/llama-cpp-python/issues/563
4462
if (WIN32 AND (LLAMA_CUDA OR LLAMA_CUBLAS))
4563
install(
4664
FILES $<TARGET_RUNTIME_DLLS:llama>
47-
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
48-
)
49-
install(
50-
FILES $<TARGET_RUNTIME_DLLS:llama>
51-
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
65+
DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
5266
)
5367
endif()
5468

@@ -71,20 +85,11 @@ if (LLAMA_BUILD)
7185
endif()
7286
install(
7387
TARGETS llava_shared
74-
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
75-
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
76-
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
77-
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
78-
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
79-
)
80-
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
81-
install(
82-
TARGETS llava_shared
83-
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
84-
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
85-
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
86-
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
87-
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
88+
LIBRARY DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
89+
RUNTIME DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
90+
ARCHIVE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
91+
FRAMEWORK DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
92+
RESOURCE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
8893
)
8994
endif()
9095
endif()

llama_cpp/llama_cpp.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
273273
# LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
274274
# LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE
275275
# LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
276+
# LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram
276277
# };
277278
LLAMA_VOCAB_TYPE_NONE = 0
278279
"""For models without vocab"""
@@ -282,6 +283,8 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
282283
"""GPT-2 tokenizer based on byte-level BPE"""
283284
LLAMA_VOCAB_TYPE_WPM = 3
284285
"""BERT tokenizer based on WordPiece"""
286+
LLAMA_VOCAB_TYPE_UGM = 4
287+
"""T5 tokenizer based on Unigram"""
285288

286289

287290
# // pre-tokenization types
@@ -302,6 +305,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
302305
# LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
303306
# LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
304307
# LLAMA_VOCAB_PRE_TYPE_PORO = 15,
308+
# LLAMA_VOCAB_PRE_TYPE_VIKING = 16,
305309
# };
306310
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
307311
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -319,6 +323,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
319323
LLAMA_VOCAB_PRE_TYPE_DBRX = 13
320324
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14
321325
LLAMA_VOCAB_PRE_TYPE_PORO = 15
326+
LLAMA_VOCAB_PRE_TYPE_VIKING = 16
322327

323328

324329
# // note: these values should be synchronized with ggml_rope
@@ -2317,7 +2322,7 @@ def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
23172322
...
23182323

23192324

2320-
# // Set whether the model is in embeddings model or not
2325+
# // Set whether the model is in embeddings mode or not
23212326
# // If true, embeddings will be returned but logits will not
23222327
# LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
23232328
@ctypes_function("llama_set_embeddings", [llama_context_p_ctypes, ctypes.c_bool], None)

vendor/llama.cpp

0 commit comments

Comments
 (0)