Skip to content

Commit 66b039a

Browse files
authored
docker : update CUDA images (#9213)
1 parent 20f1789 commit 66b039a

File tree

4 files changed

+42
-42
lines changed

4 files changed

+42
-42
lines changed

.devops/full-cuda.Dockerfile

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
11
ARG UBUNTU_VERSION=22.04
2-
32
# This needs to generally match the container host's environment.
4-
ARG CUDA_VERSION=11.7.1
5-
3+
ARG CUDA_VERSION=12.6.0
64
# Target the CUDA build image
75
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
86

97
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
108

11-
# Unless otherwise specified, we make a fat build.
12-
ARG CUDA_DOCKER_ARCH=all
9+
# CUDA architecture to build for (defaults to all supported archs)
10+
ARG CUDA_DOCKER_ARCH=default
1311

1412
RUN apt-get update && \
15-
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
13+
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
1614

1715
COPY requirements.txt requirements.txt
1816
COPY requirements requirements
@@ -24,13 +22,12 @@ WORKDIR /app
2422

2523
COPY . .
2624

27-
# Set nvcc architecture
28-
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
29-
# Enable CUDA
30-
ENV GGML_CUDA=1
31-
# Enable cURL
32-
ENV LLAMA_CURL=1
33-
34-
RUN make -j$(nproc)
25+
# Use the default CUDA archs if not specified
26+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
27+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
28+
fi && \
29+
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
30+
cmake --build build --config Release --target llama-cli -j$(nproc) && \
31+
cp build/bin/* .
3532

3633
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/llama-cli-cuda.Dockerfile

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,37 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG CUDA_VERSION=11.7.1
3+
ARG CUDA_VERSION=12.6.0
44
# Target the CUDA build image
55
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
66
# Target the CUDA runtime image
77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

99
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

11-
# Unless otherwise specified, we make a fat build.
12-
ARG CUDA_DOCKER_ARCH=all
11+
# CUDA architecture to build for (defaults to all supported archs)
12+
ARG CUDA_DOCKER_ARCH=default
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential git
15+
apt-get install -y build-essential git cmake
1616

1717
WORKDIR /app
1818

1919
COPY . .
2020

21-
# Set nvcc architecture
22-
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23-
# Enable CUDA
24-
ENV GGML_CUDA=1
25-
26-
RUN make -j$(nproc) llama-cli
21+
# Use the default CUDA archs if not specified
22+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
23+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
24+
fi && \
25+
cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26+
cmake --build build --config Release --target llama-cli -j$(nproc)
2727

2828
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2929

3030
RUN apt-get update && \
3131
apt-get install -y libgomp1
3232

33-
COPY --from=build /app/llama-cli /llama-cli
33+
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34+
COPY --from=build /app/build/src/libllama.so /libllama.so
35+
COPY --from=build /app/build/bin/llama-cli /llama-cli
3436

3537
ENTRYPOINT [ "/llama-cli" ]

.devops/llama-server-cuda.Dockerfile

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,41 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG CUDA_VERSION=11.7.1
3+
ARG CUDA_VERSION=12.6.0
44
# Target the CUDA build image
55
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
66
# Target the CUDA runtime image
77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

99
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

11-
# Unless otherwise specified, we make a fat build.
12-
ARG CUDA_DOCKER_ARCH=all
11+
# CUDA architecture to build for (defaults to all supported archs)
12+
ARG CUDA_DOCKER_ARCH=default
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential git libcurl4-openssl-dev
15+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
1616

1717
WORKDIR /app
1818

1919
COPY . .
2020

21-
# Set nvcc architecture
22-
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23-
# Enable CUDA
24-
ENV GGML_CUDA=1
25-
# Enable cURL
26-
ENV LLAMA_CURL=1
27-
# Must be set to 0.0.0.0 so it can listen to requests from host machine
28-
ENV LLAMA_ARG_HOST=0.0.0.0
29-
30-
RUN make -j$(nproc) llama-server
21+
# Use the default CUDA archs if not specified
22+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
23+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
24+
fi && \
25+
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26+
cmake --build build --config Release --target llama-server -j$(nproc)
3127

3228
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
3329

3430
RUN apt-get update && \
3531
apt-get install -y libcurl4-openssl-dev libgomp1 curl
3632

37-
COPY --from=build /app/llama-server /llama-server
33+
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34+
COPY --from=build /app/build/src/libllama.so /libllama.so
35+
COPY --from=build /app/build/bin/llama-server /llama-server
36+
37+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
38+
ENV LLAMA_ARG_HOST=0.0.0.0
3839

3940
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
4041

docs/docker.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ You may want to pass in some different `ARGS`, depending on the CUDA environment
6666

6767
The defaults are:
6868

69-
- `CUDA_VERSION` set to `11.7.1`
70-
- `CUDA_DOCKER_ARCH` set to `all`
69+
- `CUDA_VERSION` set to `12.6.0`
70+
- `CUDA_DOCKER_ARCH` set to the cmake build default, which includes all the supported architectures
7171

7272
The resulting images, are essentially the same as the non-CUDA images:
7373

0 commit comments

Comments
 (0)