Skip to content

Commit fdad997

Browse files
committed
docker: add support for CUDA in docker
1 parent b5c9295 commit fdad997

File tree

4 files changed

+97
-1
lines changed

4 files changed

+97
-1
lines changed

.devops/full-cuda.Dockerfile

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
# This needs to generally match the container host's environment.
4+
ARG CUDA_VERSION=11.7.1
5+
6+
# Target the CUDA build image
7+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
ARG CUDA_DOCKER_ARCH=all
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential python3 python3-pip
16+
17+
COPY requirements.txt requirements.txt
18+
19+
RUN pip install --upgrade pip setuptools wheel \
20+
&& pip install -r requirements.txt
21+
22+
WORKDIR /app
23+
24+
COPY . .
25+
26+
# Set nvcc architecture
27+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
28+
# Enable cuBLAS
29+
ENV LLAMA_CUBLAS=1
30+
31+
RUN make
32+
33+
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/main-cuda.Dockerfile

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
ARG UBUNTU_VERSION=22.04
2+
# This needs to generally match the container host's environment.
3+
ARG CUDA_VERSION=11.7.1
4+
# Target the CUDA build image
5+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
# Target the CUDA runtime image
7+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
ARG CUDA_DOCKER_ARCH=all
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential
16+
17+
WORKDIR /app
18+
19+
COPY . .
20+
21+
# Set nvcc architecture
22+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23+
# Enable cuBLAS
24+
ENV LLAMA_CUBLAS=1
25+
26+
RUN make
27+
28+
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
29+
30+
COPY --from=build /app/main /main
31+
32+
ENTRYPOINT [ "/main" ]

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,11 @@ ifdef LLAMA_CUBLAS
128128
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
129129
OBJS += ggml-cuda.o
130130
NVCC = nvcc
131-
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native
131+
ifdef CUDA_DOCKER_ARCH
132+
NVCCFLAGS = --forward-unknown-to-host-compiler -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
133+
else
134+
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native
135+
endif
132136
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
133137
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
134138
endif

README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,33 @@ or with a light image:
525525
docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512
526526
```
527527

528+
### Docker With CUDA
529+
530+
Assuming one has the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit) properly installed on Linux, or is using a GPU enabled cloud, `cuBLAS` should be accessible inside the container.
531+
532+
#### Building Locally
533+
534+
```bash
535+
docker build -t local/llama.cpp:full -f .devops/full-cuda.Dockerfile .
536+
docker build -t local/llama.cpp:light -f .devops/main-cuda.Dockerfile .
537+
```
538+
539+
You may want to pass in some different `ARGS`, depending on the CUDA environment supported by your container host, as well as the GPU architecture.
540+
541+
The defaults are:
542+
543+
- `CUDA_VERSION` set to `11.7.1`
544+
- `CUDA_DOCKER_ARCH` set to `all`
545+
546+
#### Usage
547+
548+
After building locally, Usage is similar to the non-CUDA examples, but you'll need to add the `--gpus` flag. You will also want to use the `--n-gpu-layers` flag.
549+
550+
```bash
551+
docker run --gpus all -v /path/to/models:/models local/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
552+
docker run --gpus all -v /path/to/models:/models local/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
553+
```
554+
528555
### Contributing
529556

530557
- Contributors can open PRs

0 commit comments

Comments
 (0)