feat: Update llama.cpp

abetlen · abetlen · commit 117cbb2f531e · 2024-07-01T21:28:11.000-04:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,9 +5,28 @@ project(llama_cpp)
 option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python package" ON)
 option(LLAVA_BUILD "Build llava shared library and install alongside python package" ON)
 
+if(SKBUILD_STATE STREQUAL "editable")
+    # Install into the source directory
+    # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
+    set(LLAMA_CPP_PYTHON_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp)
+else()
+    set(LLAMA_CPP_PYTHON_INSTALL_DIR ${SKBUILD_PLATLIB_DIR}/llama_cpp)
+endif()
+
 if (LLAMA_BUILD)
     set(BUILD_SHARED_LIBS "On")
 
+    set(CMAKE_SKIP_BUILD_RPATH FALSE)
+
+    # When building, don't use the install RPATH already
+    # (but later on when installing)
+    set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
+ 
+    # Add the automatically determined parts of the RPATH
+    # which point to directories outside the build tree to the install RPATH
+    set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+    set(CMAKE_INSTALL_RPATH "${LLAMA_CPP_PYTHON_INSTALL_DIR}")
+
     # Building llama
     if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
         # Need to disable these llama.cpp flags on Apple x86_64,
@@ -24,31 +43,26 @@ if (LLAMA_BUILD)
 
     add_subdirectory(vendor/llama.cpp)
     install(
-        TARGETS llama 
-        LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
-        RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
-        ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
-        FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
-        RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
+        TARGETS llama
+        LIBRARY DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+        RUNTIME DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+        ARCHIVE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+        FRAMEWORK DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+        RESOURCE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
     )
-    # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
     install(
-        TARGETS llama 
-        LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
-        RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
-        ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
-        FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
-        RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
+        TARGETS ggml
+        LIBRARY DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+        RUNTIME DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+        ARCHIVE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+        FRAMEWORK DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+        RESOURCE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
     )
     # Workaround for Windows + CUDA https://github.com/abetlen/llama-cpp-python/issues/563
     if (WIN32 AND (LLAMA_CUDA OR LLAMA_CUBLAS))
         install(
             FILES $<TARGET_RUNTIME_DLLS:llama>
-            DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
-        )
-        install(
-            FILES $<TARGET_RUNTIME_DLLS:llama>
-            DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
+            DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
         )
     endif()
 
@@ -71,20 +85,11 @@ if (LLAMA_BUILD)
         endif()
         install(
             TARGETS llava_shared
-            LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
-            RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
-            ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
-            FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
-            RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
-        )
-        # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
-        install(
-            TARGETS llava_shared
-            LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
-            RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
-            ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
-            FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
-            RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
+            LIBRARY DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+            RUNTIME DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+            ARCHIVE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+            FRAMEWORK DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
+            RESOURCE DESTINATION ${LLAMA_CPP_PYTHON_INSTALL_DIR}
         )
     endif()
 endif()
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -273,6 +273,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
 #     LLAMA_VOCAB_TYPE_SPM  = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
 #     LLAMA_VOCAB_TYPE_BPE  = 2, // GPT-2 tokenizer based on byte-level BPE
 #     LLAMA_VOCAB_TYPE_WPM  = 3, // BERT tokenizer based on WordPiece
+#     LLAMA_VOCAB_TYPE_UGM  = 4, // T5 tokenizer based on Unigram
 # };
 LLAMA_VOCAB_TYPE_NONE = 0
 """For models without vocab"""
@@ -282,6 +283,8 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
 """GPT-2 tokenizer based on byte-level BPE"""
 LLAMA_VOCAB_TYPE_WPM = 3
 """BERT tokenizer based on WordPiece"""
+LLAMA_VOCAB_TYPE_UGM = 4
+"""T5 tokenizer based on Unigram"""
 
 
 # // pre-tokenization types
@@ -302,6 +305,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
 #     LLAMA_VOCAB_PRE_TYPE_DBRX           = 13,
 #     LLAMA_VOCAB_PRE_TYPE_SMAUG          = 14,
 #     LLAMA_VOCAB_PRE_TYPE_PORO           = 15,
+#     LLAMA_VOCAB_PRE_TYPE_VIKING         = 16,
 # };
 LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
 LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -319,6 +323,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
 LLAMA_VOCAB_PRE_TYPE_DBRX = 13
 LLAMA_VOCAB_PRE_TYPE_SMAUG = 14
 LLAMA_VOCAB_PRE_TYPE_PORO = 15
+LLAMA_VOCAB_PRE_TYPE_VIKING = 16
 
 
 # // note: these values should be synchronized with ggml_rope
@@ -2317,7 +2322,7 @@ def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
     ...
 
 
-# // Set whether the model is in embeddings model or not
+# // Set whether the model is in embeddings mode or not
 # // If true, embeddings will be returned but logits will not
 # LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
 @ctypes_function("llama_set_embeddings", [llama_context_p_ctypes, ctypes.c_bool], None)
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 557b653dc9ed91e8c313e87500e0050c775f81b6
+Subproject commit 5fac350b9cc49d0446fc291b9c4ad53666c77591