Skip to content

Commit d7db159

Browse files
committed
Merge branch 'master' into compilade/parallel-convert
2 parents d8bab9e + 578754b commit d7db159

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+3678
-1396
lines changed

.devops/cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ COPY . .
2121
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2222
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2323
fi && \
24-
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
24+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
2525
cmake --build build --config Release -j$(nproc)
2626

2727
RUN mkdir -p /app/lib && \

.devops/intel.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
1717
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
1818
fi && \
1919
echo "Building with dynamic libs" && \
20-
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
20+
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${OPT_SYCL_F16} && \
2121
cmake --build build --config Release -j$(nproc)
2222

2323
RUN mkdir -p /app/lib && \

.devops/llama-cli-cann.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
1+
ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
22

33
FROM ascendai/cann:$ASCEND_VERSION AS build
44

55
WORKDIR /app
66

77
COPY . .
88

9-
RUN yum install -y gcc g++ cmake make
9+
RUN yum install -y gcc g++ cmake make libcurl-devel
1010
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
1111
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
1212
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ COPY . .
3535
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
3636
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
3737
fi && \
38-
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
38+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
3939
cmake --build build --config Release -j$(nproc)
4040

4141
RUN mkdir -p /app/lib && \

.devops/rocm.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1717
# gfx906 is deprecated
1818
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
1919

20-
#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21-
ARG ROCM_DOCKER_ARCH=gfx1100
20+
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21+
#ARG ROCM_DOCKER_ARCH=gfx1100
2222

2323
# Set nvcc architectured
2424
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
@@ -40,7 +40,7 @@ WORKDIR /app
4040
COPY . .
4141

4242
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43-
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release \
43+
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
4444
&& cmake --build build --config Release -j$(nproc)
4545

4646
RUN mkdir -p /app/lib \

.devops/vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ WORKDIR /app
1616

1717
COPY . .
1818

19-
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
19+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
2020
cmake --build build --config Release -j$(nproc)
2121

2222
RUN mkdir -p /app/lib && \

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,7 +1771,7 @@ jobs:
17711771
strategy:
17721772
matrix:
17731773
cann:
1774-
- '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
1774+
- '8.1.RC1.alpha001-910b-openeuler22.03-py3.10'
17751775
device:
17761776
- 'ascend910b3'
17771777
build:
@@ -1784,7 +1784,7 @@ jobs:
17841784
- name: Dependencies
17851785
run: |
17861786
yum update -y
1787-
yum install -y git gcc gcc-c++ make cmake
1787+
yum install -y git gcc gcc-c++ make cmake libcurl-devel
17881788
17891789
- name: Build
17901790
run: |

.github/workflows/docker.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@ jobs:
3636
matrix:
3737
config:
3838
# Multi-stage build
39-
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
40-
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
41-
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
42-
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
43-
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
39+
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
40+
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
41+
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
42+
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
43+
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
4444
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
45-
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
45+
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
4646
steps:
4747
- name: Check out the repo
4848
uses: actions/checkout@v4

README.md

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,6 @@
99

1010
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
1111

12-
> [!IMPORTANT]
13-
> New `llama.cpp` package location: [ggml-org/llama.cpp](https://github.com/ggml-org/llama.cpp/pkgs/container/llama.cpp)
14-
>
15-
> Update your container URLs to: `ghcr.io/ggml-org/llama.cpp`
16-
>
17-
> More info: https://github.com/ggml-org/llama.cpp/discussions/11801
18-
1912
## Recent API changes
2013

2114
- [Changelog for `libllama` API](https://github.com/ggml-org/llama.cpp/issues/9289)
@@ -104,6 +97,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
10497
- [x] [Flan T5](https://huggingface.co/models?search=flan-t5)
10598
- [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca)
10699
- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b) + [GLMEdge-1.5b](https://huggingface.co/THUDM/glm-edge-1.5b-chat) + [GLMEdge-4b](https://huggingface.co/THUDM/glm-edge-4b-chat)
100+
- [x] [GLM-4-0414](https://huggingface.co/collections/THUDM/glm-4-0414-67f3cbcb34dd9d252707cb2e)
107101
- [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966)
108102
- [x] [EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct)
109103
- [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a)
@@ -247,6 +241,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
247241
| [Vulkan](docs/build.md#vulkan) | GPU |
248242
| [CANN](docs/build.md#cann) | Ascend NPU |
249243
| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
244+
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/examples/rpc) | All |
250245

251246
## Building the project
252247

@@ -265,7 +260,9 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt
265260
- [Trending](https://huggingface.co/models?library=gguf&sort=trending)
266261
- [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf)
267262

268-
You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf <user>/<model>[:quant]`
263+
You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from [Hugging Face](https://huggingface.co/) or other model hosting sites, such as [ModelScope](https://modelscope.cn/), by using this CLI argument: `-hf <user>/<model>[:quant]`.
264+
265+
By default, the CLI would download from Hugging Face, you can switch to other options with the environment variable `MODEL_ENDPOINT`. For example, you may opt to downloading model checkpoints from ModelScope or other model sharing communities by setting the environment variable, e.g. `MODEL_ENDPOINT=https://www.modelscope.cn/`.
269266

270267
After downloading a model, use the CLI tools to run it locally - see below.
271268

build-xcframework.sh

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ COMMON_CMAKE_ARGS=(
4141
-DGGML_OPENMP=${GGML_OPENMP}
4242
)
4343

44+
XCODE_VERSION=$(xcodebuild -version 2>/dev/null | head -n1 | awk '{ print $2 }')
45+
MAJOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f1)
46+
MINOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f2)
47+
echo "Detected Xcode version: $XCODE_VERSION"
48+
4449
check_required_tool() {
4550
local tool=$1
4651
local install_message=$2
@@ -325,21 +330,28 @@ combine_static_libraries() {
325330

326331
# Platform-specific post-processing for device builds
327332
if [[ "$is_simulator" == "false" ]]; then
328-
if command -v vtool &>/dev/null; then
333+
if command -v xcrun vtool &>/dev/null; then
329334
case "$platform" in
330335
"ios")
331336
echo "Marking binary as a framework binary for iOS..."
332-
vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \
337+
xcrun vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \
333338
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
334339
;;
335340
"visionos")
336341
echo "Marking binary as a framework binary for visionOS..."
337-
vtool -set-build-version xros ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \
342+
if [[ "$MAJOR_VERSION" -gt 16 ]] || [[ "$MAJOR_VERSION" -eq 16 && "$MINOR_VERSION" -gt 2 ]]; then
343+
echo "Xcode version greater than 16.2, using visionOS."
344+
VISION_OS_BUILD_VERSION="visionos"
345+
else
346+
echo "Xcode version less than or equal to 16.2, using xros."
347+
VISION_OS_BUILD_VERSION="xros"
348+
fi
349+
xcrun vtool -set-build-version ${VISION_OS_BUILD_VERSION} ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \
338350
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
339351
;;
340352
"tvos")
341353
echo "Marking binary as a framework binary for tvOS..."
342-
vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \
354+
xcrun vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \
343355
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
344356
;;
345357
esac

common/arg.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,13 @@ static bool common_download_file_single(const std::string & url, const std::stri
228228
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
229229
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
230230

231+
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
231232
// Check if hf-token or bearer-token was specified
232233
if (!bearer_token.empty()) {
233234
std::string auth_header = "Authorization: Bearer " + bearer_token;
234235
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
235-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
236236
}
237+
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
237238

238239
#if defined(_WIN32)
239240
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
@@ -544,7 +545,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
544545
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
545546
curl_slist_ptr http_headers;
546547
std::string res_str;
547-
std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag;
548+
549+
std::string model_endpoint = get_model_endpoint();
550+
551+
std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag;
548552
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
549553
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
550554
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
@@ -659,13 +663,8 @@ static void common_params_handle_model(
659663
}
660664
}
661665

662-
std::string hf_endpoint = "https://huggingface.co/";
663-
const char * hf_endpoint_env = getenv("HF_ENDPOINT");
664-
if (hf_endpoint_env) {
665-
hf_endpoint = hf_endpoint_env;
666-
if (hf_endpoint.back() != '/') hf_endpoint += '/';
667-
}
668-
model.url = hf_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
666+
std::string model_endpoint = get_model_endpoint();
667+
model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
669668
// make sure model path is present (for caching purposes)
670669
if (model.path.empty()) {
671670
// this is to avoid different repo having same file name, or same file name in different subdirs

common/common.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,19 @@ struct common_init_result common_init_from_params(common_params & params) {
10271027
return iparams;
10281028
}
10291029

1030+
std::string get_model_endpoint() {
1031+
const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
1032+
// We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
1033+
const char * hf_endpoint_env = getenv("HF_ENDPOINT");
1034+
const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
1035+
std::string model_endpoint = "https://huggingface.co/";
1036+
if (endpoint_env) {
1037+
model_endpoint = endpoint_env;
1038+
if (model_endpoint.back() != '/') model_endpoint += '/';
1039+
}
1040+
return model_endpoint;
1041+
}
1042+
10301043
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
10311044
llama_clear_adapter_lora(ctx);
10321045
for (auto & la : lora) {

common/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,8 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p
543543
// clear LoRA adapters from context, then apply new list of adapters
544544
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
545545

546+
std::string get_model_endpoint();
547+
546548
//
547549
// Batch utils
548550
//

0 commit comments

Comments
 (0)