Skip to content

Commit d359f30

Browse files
authored
llama : remove MPI backend (#7395)
1 parent 1ea2a00 commit d359f30

File tree

9 files changed

+2
-425
lines changed

9 files changed

+2
-425
lines changed

.devops/nix/package.nix

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,6 @@ effectiveStdenv.mkDerivation (
214214
(cmakeBool "LLAMA_CUDA" useCuda)
215215
(cmakeBool "LLAMA_HIPBLAS" useRocm)
216216
(cmakeBool "LLAMA_METAL" useMetalKit)
217-
(cmakeBool "LLAMA_MPI" useMpi)
218217
(cmakeBool "LLAMA_VULKAN" useVulkan)
219218
(cmakeBool "LLAMA_STATIC" enableStatic)
220219
]

.github/workflows/build.yml

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -306,40 +306,6 @@ jobs:
306306
cd build
307307
ctest -L main --verbose --timeout 900
308308
309-
ubuntu-latest-cmake-mpi:
310-
runs-on: ubuntu-latest
311-
312-
continue-on-error: true
313-
314-
strategy:
315-
matrix:
316-
mpi_library: [mpich, libopenmpi-dev]
317-
318-
steps:
319-
- name: Clone
320-
id: checkout
321-
uses: actions/checkout@v4
322-
323-
- name: Dependencies
324-
id: depends
325-
run: |
326-
sudo apt-get update
327-
sudo apt-get install build-essential ${{ matrix.mpi_library }}
328-
329-
- name: Build
330-
id: cmake_build
331-
run: |
332-
mkdir build
333-
cd build
334-
cmake -DLLAMA_MPI=ON ..
335-
cmake --build . --config Release -j $(nproc)
336-
337-
- name: Test
338-
id: cmake_test
339-
run: |
340-
cd build
341-
ctest -L main --verbose
342-
343309
ubuntu-latest-cmake-rpc:
344310
runs-on: ubuntu-latest
345311

CMakeLists.txt

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING
122122
"llama: metal minimum macOS version")
123123
set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)")
124124
option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
125-
option(LLAMA_MPI "llama: use MPI" OFF)
126125
option(LLAMA_RPC "llama: use RPC" OFF)
127126
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
128127
option(LLAMA_SYCL "llama: use SYCL" OFF)
@@ -466,35 +465,6 @@ if (LLAMA_CUDA)
466465
endif()
467466
endif()
468467

469-
if (LLAMA_MPI)
470-
cmake_minimum_required(VERSION 3.10)
471-
find_package(MPI)
472-
if (MPI_C_FOUND)
473-
message(STATUS "MPI found")
474-
475-
set(GGML_HEADERS_MPI ggml-mpi.h)
476-
set(GGML_SOURCES_MPI ggml-mpi.c)
477-
478-
add_compile_definitions(GGML_USE_MPI)
479-
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
480-
481-
if (NOT MSVC)
482-
add_compile_options(-Wno-cast-qual)
483-
endif()
484-
485-
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})
486-
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
487-
488-
# Even if you're only using the C header, C++ programs may bring in MPI
489-
# C++ functions, so more linkage is needed
490-
if (MPI_CXX_FOUND)
491-
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_CXX_LIBRARIES})
492-
endif()
493-
else()
494-
message(WARNING "MPI not found")
495-
endif()
496-
endif()
497-
498468
if (LLAMA_RPC)
499469
add_compile_definitions(GGML_USE_RPC)
500470

@@ -1218,7 +1188,6 @@ add_library(ggml OBJECT
12181188
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
12191189
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
12201190
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1221-
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
12221191
${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
12231192
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
12241193
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
@@ -1306,7 +1275,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
13061275

13071276
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
13081277
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
1309-
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}")
1278+
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_EXTRA}")
13101279

13111280
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
13121281
install(TARGETS ggml PUBLIC_HEADER)

Makefile

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -399,13 +399,6 @@ ifndef LLAMA_NO_ACCELERATE
399399
endif
400400
endif # LLAMA_NO_ACCELERATE
401401

402-
ifdef LLAMA_MPI
403-
MK_CPPFLAGS += -DGGML_USE_MPI
404-
MK_CFLAGS += -Wno-cast-qual
405-
MK_CXXFLAGS += -Wno-cast-qual
406-
OBJS += ggml-mpi.o
407-
endif # LLAMA_MPI
408-
409402
ifdef LLAMA_OPENBLAS
410403
MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
411404
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
@@ -629,11 +622,6 @@ ggml-metal-embed.o: ggml-metal.metal ggml-common.h
629622
endif
630623
endif # LLAMA_METAL
631624

632-
ifdef LLAMA_MPI
633-
ggml-mpi.o: ggml-mpi.c ggml-mpi.h
634-
$(CC) $(CFLAGS) -c $< -o $@
635-
endif # LLAMA_MPI
636-
637625
ifndef LLAMA_NO_LLAMAFILE
638626
sgemm.o: sgemm.cpp sgemm.h ggml.h
639627
$(CXX) $(CXXFLAGS) -c $< -o $@

README.md

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -382,45 +382,6 @@ To disable the Metal build at compile time use the `LLAMA_NO_METAL=1` flag or th
382382
When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers|-ngl 0` command-line
383383
argument.
384384
385-
### MPI Build
386-
387-
MPI lets you distribute the computation over a cluster of machines. Because of the serial nature of LLM prediction, this won't yield any end-to-end speed-ups, but it will let you run larger models than would otherwise fit into RAM on a single machine.
388-
389-
First you will need MPI libraries installed on your system. The two most popular (only?) options are [MPICH](https://www.mpich.org) and [OpenMPI](https://www.open-mpi.org). Either can be installed with a package manager (`apt`, Homebrew, MacPorts, etc).
390-
391-
Next you will need to build the project with `LLAMA_MPI` set to true on all machines; if you're building with `make`, you will also need to specify an MPI-capable compiler (when building with CMake, this is configured automatically):
392-
393-
- Using `make`:
394-
395-
```bash
396-
make CC=mpicc CXX=mpicxx LLAMA_MPI=1
397-
```
398-
399-
- Using `CMake`:
400-
401-
```bash
402-
cmake -S . -B build -DLLAMA_MPI=ON
403-
```
404-
405-
Once the programs are built, download/convert the weights on all of the machines in your cluster. The paths to the weights and programs should be identical on all machines.
406-
407-
Next, ensure password-less SSH access to each machine from the primary host, and create a `hostfile` with a list of the hostnames and their relative "weights" (slots). If you want to use localhost for computation, use its local subnet IP address rather than the loopback address or "localhost".
408-
409-
Here is an example hostfile:
410-
411-
```
412-
192.168.0.1:2
413-
malvolio.local:1
414-
```
415-
416-
The above will distribute the computation across 2 processes on the first host and 1 process on the second host. Each process will use roughly an equal amount of RAM. Try to keep these numbers small, as inter-process (intra-host) communication is expensive.
417-
418-
Finally, you're ready to run a computation using `mpirun`:
419-
420-
```bash
421-
mpirun -hostfile hostfile -n 3 ./main -m ./models/7B/ggml-model-q4_0.gguf -n 128
422-
```
423-
424385
### BLAS Build
425386
426387
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS and CLBlast. There are currently several different BLAS implementations available for build and use:

ggml-mpi.c

Lines changed: 0 additions & 216 deletions
This file was deleted.

0 commit comments

Comments
 (0)