Skip to content

Commit 1875733

Browse files
committed
Merge remote-tracking branch 'origin/master' into feature/online-flow
2 parents 74d660a + ae8de6d commit 1875733

File tree

216 files changed

+19876
-18305
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

216 files changed

+19876
-18305
lines changed

.devops/llama-cli-cuda.Dockerfile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2323
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2424
fi && \
2525
cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26-
cmake --build build --config Release --target llama-cli -j$(nproc)
26+
cmake --build build --config Release --target llama-cli -j$(nproc) && \
27+
mkdir -p /app/lib && \
28+
find build -name "*.so" -exec cp {} /app/lib \;
2729

2830
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2931

3032
RUN apt-get update && \
3133
apt-get install -y libgomp1
3234

33-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34-
COPY --from=build /app/build/src/libllama.so /libllama.so
35-
COPY --from=build /app/build/bin/llama-cli /llama-cli
35+
COPY --from=build /app/lib/ /
36+
COPY --from=build /app/build/bin/llama-cli /
3637

3738
ENTRYPOINT [ "/llama-cli" ]

.devops/llama-cli-musa.Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@ WORKDIR /app
1616
COPY . .
1717

1818
RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
19-
cmake --build build --config Release --target llama-cli -j$(nproc)
19+
cmake --build build --config Release --target llama-cli -j$(nproc) && \
20+
mkdir -p /app/lib && \
21+
find build -name "*.so" -exec cp {} /app/lib \;
2022

2123
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
2224

2325
RUN apt-get update && \
2426
apt-get install -y libgomp1
2527

26-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
27-
COPY --from=build /app/build/src/libllama.so /libllama.so
28+
COPY --from=build /app/lib/ /
2829
COPY --from=build /app/build/bin/llama-cli /llama-cli
2930

3031
ENTRYPOINT [ "/llama-cli" ]

.devops/llama-server-cuda.Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2323
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2424
fi && \
2525
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26-
cmake --build build --config Release --target llama-server -j$(nproc)
26+
cmake --build build --config Release --target llama-server -j$(nproc) && \
27+
mkdir -p /app/lib && \
28+
find build -name "*.so" -exec cp {} /app/lib \;
2729

2830
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2931

3032
RUN apt-get update && \
3133
apt-get install -y libcurl4-openssl-dev libgomp1 curl
3234

33-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34-
COPY --from=build /app/build/src/libllama.so /libllama.so
35+
COPY --from=build /app/lib/ /
3536
COPY --from=build /app/build/bin/llama-server /llama-server
3637

3738
# Must be set to 0.0.0.0 so it can listen to requests from host machine

.devops/llama-server-musa.Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@ WORKDIR /app
1616
COPY . .
1717

1818
RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
19-
cmake --build build --config Release --target llama-server -j$(nproc)
19+
cmake --build build --config Release --target llama-server -j$(nproc) && \
20+
mkdir -p /app/lib && \
21+
find build -name "*.so" -exec cp {} /app/lib \;
2022

2123
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
2224

2325
RUN apt-get update && \
2426
apt-get install -y libcurl4-openssl-dev libgomp1 curl
2527

26-
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
27-
COPY --from=build /app/build/src/libllama.so /libllama.so
28+
COPY --from=build /app/lib/ /
2829
COPY --from=build /app/build/bin/llama-server /llama-server
2930

3031
# Must be set to 0.0.0.0 so it can listen to requests from host machine

.devops/nix/package.nix

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ effectiveStdenv.mkDerivation (finalAttrs: {
126126
};
127127

128128
postPatch = ''
129-
substituteInPlace ./ggml/src/ggml-metal.m \
129+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
130130
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
131-
substituteInPlace ./ggml/src/ggml-metal.m \
131+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
132132
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
133133
'';
134134

@@ -173,7 +173,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
173173
(cmakeBool "GGML_NATIVE" false)
174174
(cmakeBool "GGML_BLAS" useBlas)
175175
(cmakeBool "GGML_CUDA" useCuda)
176-
(cmakeBool "GGML_HIPBLAS" useRocm)
176+
(cmakeBool "GGML_HIP" useRocm)
177177
(cmakeBool "GGML_METAL" useMetalKit)
178178
(cmakeBool "GGML_VULKAN" useVulkan)
179179
(cmakeBool "GGML_STATIC" enableStatic)

.github/workflows/build.yml

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,13 @@ jobs:
5555
sysctl -a
5656
mkdir build
5757
cd build
58-
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF ..
58+
cmake .. \
59+
-DLLAMA_FATAL_WARNINGS=ON \
60+
-DLLAMA_CURL=ON \
61+
-DGGML_METAL_USE_BF16=ON \
62+
-DGGML_METAL_EMBED_LIBRARY=ON \
63+
-DGGML_RPC=ON \
64+
-DBUILD_SHARED_LIBS=OFF
5965
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
6066
6167
- name: Test
@@ -113,7 +119,12 @@ jobs:
113119
sysctl -a
114120
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
115121
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
116-
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
122+
cmake -B build \
123+
-DLLAMA_FATAL_WARNINGS=ON \
124+
-DLLAMA_CURL=ON \
125+
-DGGML_METAL=OFF \
126+
-DGGML_RPC=ON \
127+
-DBUILD_SHARED_LIBS=OFF
117128
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
118129
119130
- name: Test
@@ -394,13 +405,13 @@ jobs:
394405
- name: Build with native CMake HIP support
395406
id: cmake_build
396407
run: |
397-
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIPBLAS=ON
408+
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
398409
cmake --build build --config Release -j $(nproc)
399410
400411
- name: Build with legacy HIP support
401412
id: cmake_build_legacy_hip
402413
run: |
403-
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON
414+
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
404415
cmake --build build2 --config Release -j $(nproc)
405416
406417
ubuntu-22-cmake-sycl:
@@ -569,6 +580,7 @@ jobs:
569580
mkdir build
570581
cd build
571582
cmake -G Xcode .. \
583+
-DGGML_METAL_USE_BF16=ON \
572584
-DGGML_METAL_EMBED_LIBRARY=ON \
573585
-DLLAMA_BUILD_EXAMPLES=OFF \
574586
-DLLAMA_BUILD_TESTS=OFF \
@@ -599,6 +611,7 @@ jobs:
599611
mkdir build
600612
cd build
601613
cmake -G Xcode .. \
614+
-DGGML_METAL_USE_BF16=ON \
602615
-DGGML_METAL_EMBED_LIBRARY=ON \
603616
-DLLAMA_BUILD_EXAMPLES=OFF \
604617
-DLLAMA_BUILD_TESTS=OFF \
@@ -734,7 +747,7 @@ jobs:
734747
id: clone_kompute
735748
if: ${{ matrix.build == 'kompute-x64' }}
736749
run: |
737-
git submodule update --init ggml/src/kompute
750+
git submodule update --init ggml/src/ggml-kompute/kompute
738751
739752
- name: Download OpenBLAS
740753
id: get_openblas
@@ -1001,7 +1014,7 @@ jobs:
10011014
run: |
10021015
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
10031016
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1004-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
1017+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
10051018
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
10061019
10071020
windows-latest-cmake-hip-release:
@@ -1037,7 +1050,7 @@ jobs:
10371050
run: |
10381051
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
10391052
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1040-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1053+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
10411054
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
10421055
md "build\bin\rocblas\library\"
10431056
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[submodule "kompute"]
2-
path = ggml/src/kompute
2+
path = ggml/src/ggml-kompute/kompute
33
url = https://github.com/nomic-ai/kompute.git

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
140140
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
141141
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
142142

143-
144143
# At the moment some compile definitions are placed within the ggml/src
145144
# directory but not exported on the `ggml` target. This could be improved by
146145
# determining _precisely_ which defines are necessary for the llama-config

0 commit comments

Comments
 (0)