Skip to content

Commit ab6ab8f

Browse files
authored
rpc : send hash when tensor data is above some fixed threshold (#12496)
* rpc : send hash when tensor data is above some fixed threshold ref #10095 * rpc : put cache under $HOME/.cache/llama.cpp * try to fix win32 build * another try to fix win32 build * remove llama as dependency
1 parent 2099a9d commit ab6ab8f

File tree

4 files changed

+290
-16
lines changed

4 files changed

+290
-16
lines changed

examples/rpc/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
1-
add_executable(rpc-server rpc-server.cpp)
2-
target_link_libraries(rpc-server PRIVATE ggml llama)
1+
set(TARGET rpc-server)
2+
add_executable(${TARGET} rpc-server.cpp)
3+
target_link_libraries(${TARGET} PRIVATE ggml)
4+
target_compile_features(${TARGET} PRIVATE cxx_std_17)

examples/rpc/rpc-server.cpp

Lines changed: 140 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#if defined(_MSC_VER)
2+
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
3+
#endif
4+
15
#include "ggml-cpu.h"
26

37
#ifdef GGML_USE_CUDA
@@ -18,26 +22,142 @@
1822

1923
#include "ggml-rpc.h"
2024
#ifdef _WIN32
25+
# define DIRECTORY_SEPARATOR '\\'
26+
# include <locale>
2127
# include <windows.h>
28+
# include <fcntl.h>
29+
# include <io.h>
2230
#else
31+
# define DIRECTORY_SEPARATOR '/'
2332
# include <unistd.h>
33+
# include <sys/stat.h>
2434
#endif
35+
#include <codecvt>
2536
#include <string>
2637
#include <stdio.h>
38+
#include <vector>
39+
#include <filesystem>
40+
41+
namespace fs = std::filesystem;
42+
43+
// NOTE: this is copied from common.cpp to avoid linking with libcommon
44+
// returns true if successful, false otherwise
45+
static bool fs_create_directory_with_parents(const std::string & path) {
46+
#ifdef _WIN32
47+
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
48+
std::wstring wpath = converter.from_bytes(path);
49+
50+
// if the path already exists, check whether it's a directory
51+
const DWORD attributes = GetFileAttributesW(wpath.c_str());
52+
if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
53+
return true;
54+
}
55+
56+
size_t pos_slash = 0;
57+
58+
// process path from front to back, procedurally creating directories
59+
while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) {
60+
const std::wstring subpath = wpath.substr(0, pos_slash);
61+
const wchar_t * test = subpath.c_str();
62+
63+
const bool success = CreateDirectoryW(test, NULL);
64+
if (!success) {
65+
const DWORD error = GetLastError();
66+
67+
// if the path already exists, ensure that it's a directory
68+
if (error == ERROR_ALREADY_EXISTS) {
69+
const DWORD attributes = GetFileAttributesW(subpath.c_str());
70+
if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) {
71+
return false;
72+
}
73+
} else {
74+
return false;
75+
}
76+
}
77+
78+
pos_slash += 1;
79+
}
80+
81+
return true;
82+
#else
83+
// if the path already exists, check whether it's a directory
84+
struct stat info;
85+
if (stat(path.c_str(), &info) == 0) {
86+
return S_ISDIR(info.st_mode);
87+
}
88+
89+
size_t pos_slash = 1; // skip leading slashes for directory creation
90+
91+
// process path from front to back, procedurally creating directories
92+
while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) {
93+
const std::string subpath = path.substr(0, pos_slash);
94+
struct stat info;
95+
96+
// if the path already exists, ensure that it's a directory
97+
if (stat(subpath.c_str(), &info) == 0) {
98+
if (!S_ISDIR(info.st_mode)) {
99+
return false;
100+
}
101+
} else {
102+
// create parent directories
103+
const int ret = mkdir(subpath.c_str(), 0755);
104+
if (ret != 0) {
105+
return false;
106+
}
107+
}
108+
109+
pos_slash += 1;
110+
}
111+
112+
return true;
113+
#endif // _WIN32
114+
}
115+
116+
// NOTE: this is copied from common.cpp to avoid linking with libcommon
117+
static std::string fs_get_cache_directory() {
118+
std::string cache_directory = "";
119+
auto ensure_trailing_slash = [](std::string p) {
120+
// Make sure to add trailing slash
121+
if (p.back() != DIRECTORY_SEPARATOR) {
122+
p += DIRECTORY_SEPARATOR;
123+
}
124+
return p;
125+
};
126+
if (getenv("LLAMA_CACHE")) {
127+
cache_directory = std::getenv("LLAMA_CACHE");
128+
} else {
129+
#ifdef __linux__
130+
if (std::getenv("XDG_CACHE_HOME")) {
131+
cache_directory = std::getenv("XDG_CACHE_HOME");
132+
} else {
133+
cache_directory = std::getenv("HOME") + std::string("/.cache/");
134+
}
135+
#elif defined(__APPLE__)
136+
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
137+
#elif defined(_WIN32)
138+
cache_directory = std::getenv("LOCALAPPDATA");
139+
#endif // __linux__
140+
cache_directory = ensure_trailing_slash(cache_directory);
141+
cache_directory += "llama.cpp";
142+
}
143+
return ensure_trailing_slash(cache_directory);
144+
}
27145

28146
struct rpc_server_params {
29147
std::string host = "127.0.0.1";
30148
int port = 50052;
31149
size_t backend_mem = 0;
150+
bool use_cache = false;
32151
};
33152

34153
static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) {
35154
fprintf(stderr, "Usage: %s [options]\n\n", argv[0]);
36155
fprintf(stderr, "options:\n");
37-
fprintf(stderr, " -h, --help show this help message and exit\n");
38-
fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str());
39-
fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port);
40-
fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n");
156+
fprintf(stderr, " -h, --help show this help message and exit\n");
157+
fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str());
158+
fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port);
159+
fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n");
160+
fprintf(stderr, " -c, --cache enable local file cache\n");
41161
fprintf(stderr, "\n");
42162
}
43163

@@ -58,6 +178,8 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
58178
if (params.port <= 0 || params.port > 65535) {
59179
return false;
60180
}
181+
} else if (arg == "-c" || arg == "--cache") {
182+
params.use_cache = true;
61183
} else if (arg == "-m" || arg == "--mem") {
62184
if (++i >= argc) {
63185
return false;
@@ -164,8 +286,20 @@ int main(int argc, char * argv[]) {
164286
} else {
165287
get_backend_memory(&free_mem, &total_mem);
166288
}
167-
printf("Starting RPC server on %s, backend memory: %zu MB\n", endpoint.c_str(), free_mem / (1024 * 1024));
168-
ggml_backend_rpc_start_server(backend, endpoint.c_str(), free_mem, total_mem);
289+
const char * cache_dir = nullptr;
290+
std::string cache_dir_str = fs_get_cache_directory() + "rpc/";
291+
if (params.use_cache) {
292+
if (!fs_create_directory_with_parents(cache_dir_str)) {
293+
fprintf(stderr, "Failed to create cache directory: %s\n", cache_dir_str.c_str());
294+
return 1;
295+
}
296+
cache_dir = cache_dir_str.c_str();
297+
}
298+
printf("Starting RPC server\n");
299+
printf(" endpoint : %s\n", endpoint.c_str());
300+
printf(" local cache : %s\n", cache_dir ? cache_dir : "n/a");
301+
printf(" backend memory : %zu MB\n", free_mem / (1024 * 1024));
302+
ggml_backend_rpc_start_server(backend, endpoint.c_str(), cache_dir, free_mem, total_mem);
169303
ggml_backend_free(backend);
170304
return 0;
171305
}

ggml/include/ggml-rpc.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c
1717

1818
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
1919

20-
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
20+
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
21+
const char * cache_dir,
22+
size_t free_mem, size_t total_mem);
2123

2224
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
2325

0 commit comments

Comments
 (0)