Skip to content

Commit 89f4d6e

Browse files
committed
ggml : add RPC backend
The RPC backend proxies all operations to a remote server which runs a regular backend (CPU, CUDA, Metal, etc).
1 parent 8960fe8 commit 89f4d6e

File tree

9 files changed

+841
-2
lines changed

9 files changed

+841
-2
lines changed

CMakeLists.txt

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING
131131
set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)")
132132
option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
133133
option(LLAMA_MPI "llama: use MPI" OFF)
134+
option(LLAMA_RPC "llama: use RPC" OFF)
134135
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
135136
option(LLAMA_SYCL "llama: use SYCL" OFF)
136137
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
@@ -494,6 +495,45 @@ if (LLAMA_MPI)
494495
endif()
495496
endif()
496497

498+
if (LLAMA_RPC)
499+
find_package(protobuf CONFIG REQUIRED)
500+
message(STATUS "Using protobuf ${Protobuf_VERSION}")
501+
set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
502+
set(_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
503+
504+
find_package(gRPC CONFIG REQUIRED)
505+
message(STATUS "Using gRPC ${gRPC_VERSION}")
506+
set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
507+
508+
# Proto file
509+
get_filename_component(ggml_proto "ggml-rpc.proto" ABSOLUTE)
510+
get_filename_component(ggml_proto_path "${ggml_proto}" PATH)
511+
512+
# Generated sources
513+
set(ggml_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/ggml-rpc.pb.cc")
514+
set(ggml_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/ggml-rpc.pb.h")
515+
set(ggml_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/ggml-rpc.grpc.pb.cc")
516+
set(ggml_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/ggml-rpc.grpc.pb.h")
517+
518+
add_custom_command(
519+
OUTPUT "${ggml_proto_srcs}" "${ggml_proto_hdrs}" "${ggml_grpc_srcs}" "${ggml_grpc_hdrs}"
520+
COMMAND ${_PROTOBUF_PROTOC}
521+
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
522+
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
523+
-I "${ggml_proto_path}"
524+
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
525+
"${ggml_proto}"
526+
DEPENDS "${ggml_proto}")
527+
528+
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_CURRENT_BINARY_DIR})
529+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} gRPC::grpc++ gRPC::grpc gRPC::grpc++_reflection ${_PROTOBUF_LIBPROTOBUF})
530+
531+
add_compile_definitions(GGML_USE_RPC)
532+
533+
set(GGML_HEADERS_RPC ggml-rpc.h ${ggml_grpc_hdrs} ${ggml_proto_hdrs})
534+
set(GGML_SOURCES_RPC ggml-rpc.cpp ${ggml_grpc_srcs} ${ggml_proto_srcs})
535+
endif()
536+
497537
if (LLAMA_CLBLAST)
498538
find_package(CLBlast)
499539
if (CLBlast_FOUND)
@@ -1176,6 +1216,7 @@ add_library(ggml OBJECT
11761216
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
11771217
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
11781218
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1219+
${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
11791220
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
11801221
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
11811222
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}

examples/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,7 @@ else()
4949
add_subdirectory(server)
5050
endif()
5151
add_subdirectory(export-lora)
52+
if (LLAMA_RPC)
53+
add_subdirectory(rpc)
54+
endif()
5255
endif()

examples/rpc/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
add_executable(rpc-server rpc-server.cpp)
2+
target_link_libraries(rpc-server PRIVATE ggml llama)

examples/rpc/rpc-server.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#include <memory>
2+
#include <string>
3+
#include <grpcpp/ext/proto_server_reflection_plugin.h>
4+
#include <grpcpp/grpcpp.h>
5+
#include <grpcpp/health_check_service_interface.h>
6+
7+
#include "ggml-rpc.h"
8+
9+
int main(int argc, char * argv[])
10+
{
11+
if (argc < 2) {
12+
fprintf(stderr, "Usage: %s <port>\n", argv[0]);
13+
return 1;
14+
}
15+
int port = std::stoi(argv[1]);
16+
std::string server_address = "0.0.0.0:" + std::to_string(port);
17+
BackendImpl service;
18+
19+
grpc::EnableDefaultHealthCheckService(true);
20+
grpc::reflection::InitProtoReflectionServerBuilderPlugin();
21+
grpc::ServerBuilder builder;
22+
// Listen on the given address without any authentication mechanism.
23+
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
24+
builder.RegisterService(&service);
25+
builder.SetMaxSendMessageSize(1024 * 1024 * 1024);
26+
builder.SetMaxMessageSize(1024 * 1024 * 1024);
27+
builder.SetMaxReceiveMessageSize(1024 * 1024 * 1024);
28+
std::unique_ptr<grpc::Server> server(builder.BuildAndStart());
29+
std::cout << "RPC backend listening on " << server_address << std::endl;
30+
31+
// Wait for the server to shutdown. Note that some other thread must be
32+
// responsible for shutting down the server for this call to ever return.
33+
server->Wait();
34+
35+
return 0;
36+
}

0 commit comments

Comments
 (0)