Skip to content

Commit a6b5695

Browse files
committed
Merge remote-tracking branch 'upstream/master' into custom_rope
2 parents a728a0d + 32c5411 commit a6b5695

37 files changed

+2383
-1067
lines changed

.devops/full-cuda.Dockerfile

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
# This needs to generally match the container host's environment.
4+
ARG CUDA_VERSION=11.7.1
5+
6+
# Target the CUDA build image
7+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
ARG CUDA_DOCKER_ARCH=all
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential python3 python3-pip
16+
17+
COPY requirements.txt requirements.txt
18+
19+
RUN pip install --upgrade pip setuptools wheel \
20+
&& pip install -r requirements.txt
21+
22+
WORKDIR /app
23+
24+
COPY . .
25+
26+
# Set nvcc architecture
27+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
28+
# Enable cuBLAS
29+
ENV LLAMA_CUBLAS=1
30+
31+
RUN make
32+
33+
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/main-cuda.Dockerfile

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
ARG UBUNTU_VERSION=22.04
2+
# This needs to generally match the container host's environment.
3+
ARG CUDA_VERSION=11.7.1
4+
# Target the CUDA build image
5+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
# Target the CUDA runtime image
7+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
ARG CUDA_DOCKER_ARCH=all
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential
16+
17+
WORKDIR /app
18+
19+
COPY . .
20+
21+
# Set nvcc architecture
22+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23+
# Enable cuBLAS
24+
ENV LLAMA_CUBLAS=1
25+
26+
RUN make
27+
28+
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
29+
30+
COPY --from=build /app/main /main
31+
32+
ENTRYPOINT [ "/main" ]

.devops/tools.sh

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ shift
1010
# Join the remaining arguments into a single string
1111
arg2="$@"
1212

13-
if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then
14-
python3 ./convert.py $arg2
15-
elif [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then
16-
./quantize $arg2
17-
elif [[ $arg1 == '--run' || $arg1 == '-r' ]]; then
18-
./main $arg2
19-
elif [[ $arg1 == '--all-in-one' || $arg1 == '-a' ]]; then
13+
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
14+
python3 ./convert.py "$arg2"
15+
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
16+
./quantize "$arg2"
17+
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
18+
./main "$arg2"
19+
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
2020
echo "Converting PTH to GGML..."
2121
for i in `ls $1/$2/ggml-model-f16.bin*`; do
2222
if [ -f "${i/f16/q4_0}" ]; then
@@ -26,6 +26,8 @@ elif [[ $arg1 == '--all-in-one' || $arg1 == '-a' ]]; then
2626
./quantize "$i" "${i/f16/q4_0}" q4_0
2727
fi
2828
done
29+
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
30+
./server "$arg2"
2931
else
3032
echo "Unknown command: $arg1"
3133
echo "Available commands: "
@@ -37,4 +39,6 @@ else
3739
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
3840
echo " --all-in-one (-a): Execute --convert & --quantize"
3941
echo " ex: \"/models/\" 7B"
42+
echo " --server (-s): Run a model on the server"
43+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
4044
fi

.github/workflows/build.yml

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ on:
1616
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
1717

1818
env:
19-
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
19+
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
20+
GGML_NLOOP: 3
21+
GGML_NITER: 1
22+
GGML_N_THREADS: 1
2023

2124
jobs:
2225
ubuntu-focal-make:
@@ -64,7 +67,7 @@ jobs:
6467
id: cmake_test
6568
run: |
6669
cd build
67-
ctest --verbose
70+
ctest --verbose --timeout 900
6871
6972
ubuntu-latest-cmake-sanitizer:
7073
runs-on: ubuntu-latest
@@ -95,6 +98,40 @@ jobs:
9598
cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
9699
cmake --build . --config ${{ matrix.build_type }}
97100
101+
- name: Test
102+
id: cmake_test
103+
run: |
104+
cd build
105+
ctest --verbose --timeout 900
106+
107+
ubuntu-latest-cmake-mpi:
108+
runs-on: ubuntu-latest
109+
110+
continue-on-error: true
111+
112+
strategy:
113+
matrix:
114+
mpi_library: [mpich, libopenmpi-dev]
115+
116+
steps:
117+
- name: Clone
118+
id: checkout
119+
uses: actions/checkout@v1
120+
121+
- name: Dependencies
122+
id: depends
123+
run: |
124+
sudo apt-get update
125+
sudo apt-get install build-essential ${{ matrix.mpi_library }}
126+
127+
- name: Build
128+
id: cmake_build
129+
run: |
130+
mkdir build
131+
cd build
132+
cmake -DLLAMA_MPI=ON ..
133+
cmake --build . --config Release
134+
98135
- name: Test
99136
id: cmake_test
100137
run: |
@@ -147,10 +184,11 @@ jobs:
147184
id: cmake_test
148185
run: |
149186
cd build
150-
ctest --verbose
187+
ctest --verbose --timeout 900
151188
152189
windows-latest-cmake:
153190
runs-on: windows-latest
191+
154192
env:
155193
OPENBLAS_VERSION: 0.3.23
156194
OPENCL_VERSION: 2023.04.17
@@ -249,7 +287,7 @@ jobs:
249287
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # Test AVX-512 only when possible
250288
run: |
251289
cd build
252-
ctest -C Release --verbose
290+
ctest -C Release --verbose --timeout 900
253291
254292
- name: Get commit hash
255293
id: commit

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ build-static/
2020
build-cublas/
2121
build-opencl/
2222
build-metal/
23+
build-mpi/
2324
build-no-accel/
2425
build-sanitize-addr/
2526
build-sanitize-thread/

CMakeLists.txt

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ option(LLAMA_CUDA_DMMV_F16 "llama: use 16 bit floats for dmmv
7575
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
7676
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
7777
option(LLAMA_METAL "llama: use Metal" OFF)
78+
option(LLAMA_MPI "llama: use MPI" OFF)
7879
option(LLAMA_K_QUANTS "llama: use k-quants" ON)
7980
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
8081

@@ -217,6 +218,9 @@ if (LLAMA_BLAS)
217218
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
218219
add_compile_options(${BLAS_LINKER_FLAGS})
219220
add_compile_definitions(GGML_USE_OPENBLAS)
221+
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
222+
add_compile_definitions(GGML_BLAS_USE_MKL)
223+
endif()
220224
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
221225
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
222226

@@ -268,7 +272,7 @@ if (LLAMA_CUBLAS)
268272

269273
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
270274
if (LLAMA_CUDA_DMMV_F16)
271-
set(CMAKE_CUDA_ARCHITECTURES "61") # needed for f16 CUDA intrinsics
275+
set(CMAKE_CUDA_ARCHITECTURES "60;61") # needed for f16 CUDA intrinsics
272276
else()
273277
set(CMAKE_CUDA_ARCHITECTURES "52;61") # lowest CUDA 12 standard + lowest for integer intrinsics
274278
endif()
@@ -305,6 +309,28 @@ if (LLAMA_METAL)
305309
)
306310
endif()
307311

312+
if (LLAMA_MPI)
313+
cmake_minimum_required(VERSION 3.10)
314+
find_package(MPI)
315+
if (MPI_C_FOUND)
316+
message(STATUS "MPI found")
317+
set(GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h)
318+
add_compile_definitions(GGML_USE_MPI)
319+
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
320+
set(cxx_flags ${cxx_flags} -Wno-cast-qual)
321+
set(c_flags ${c_flags} -Wno-cast-qual)
322+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})
323+
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
324+
# Even if you're only using the C header, C++ programs may bring in MPI
325+
# C++ functions, so more linkage is needed
326+
if (MPI_CXX_FOUND)
327+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_CXX_LIBRARIES})
328+
endif()
329+
else()
330+
message(WARNING "MPI not found")
331+
endif()
332+
endif()
333+
308334
if (LLAMA_CLBLAST)
309335
find_package(CLBlast)
310336
if (CLBlast_FOUND)
@@ -473,6 +499,7 @@ add_library(ggml OBJECT
473499
${GGML_SOURCES_CUDA}
474500
${GGML_SOURCES_OPENCL}
475501
${GGML_SOURCES_METAL}
502+
${GGML_SOURCES_MPI}
476503
${GGML_SOURCES_EXTRA}
477504
)
478505

Makefile

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,15 @@ ifndef LLAMA_NO_ACCELERATE
147147
endif
148148
endif # LLAMA_NO_ACCELERATE
149149

150+
ifdef LLAMA_MPI
151+
CFLAGS += -DGGML_USE_MPI -Wno-cast-qual
152+
CXXFLAGS += -DGGML_USE_MPI -Wno-cast-qual
153+
OBJS += ggml-mpi.o
154+
155+
ggml-mpi.o: ggml-mpi.c ggml-mpi.h
156+
$(CC) $(CFLAGS) -c $< -o $@
157+
endif # LLAMA_MPI
158+
150159
ifdef LLAMA_OPENBLAS
151160
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas -I/usr/include/openblas
152161
LDFLAGS += -lopenblas
@@ -163,7 +172,12 @@ ifdef LLAMA_CUBLAS
163172
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
164173
OBJS += ggml-cuda.o
165174
NVCC = nvcc
166-
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native
175+
NVCCFLAGS = --forward-unknown-to-host-compiler
176+
ifdef CUDA_DOCKER_ARCH
177+
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
178+
else
179+
NVCCFLAGS += -arch=native
180+
endif # CUDA_DOCKER_ARCH
167181
ifdef LLAMA_CUDA_FORCE_DMMV
168182
NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
169183
endif # LLAMA_CUDA_FORCE_DMMV
@@ -187,6 +201,7 @@ ifdef LLAMA_CUDA_KQUANTS_ITER
187201
else
188202
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
189203
endif
204+
190205
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
191206
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
192207
endif # LLAMA_CUBLAS

0 commit comments

Comments
 (0)