Skip to content

Commit 1a43c72

Browse files
authored
server : add option to disable KV offload (#6468)
1 parent 72d73af commit 1a43c72

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

examples/server/server.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,6 +2213,8 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
22132213
printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n");
22142214
printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n");
22152215
printf(" or for intermediate results and KV (with split-mode = row)\n");
2216+
printf(" -nkvo, --no-kv-offload\n");
2217+
printf(" disable KV offload\n");
22162218
}
22172219
printf(" -m FNAME, --model FNAME\n");
22182220
printf(" model path (default: %s)\n", params.model.c_str());
@@ -2498,6 +2500,8 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
24982500
"See main README.md for information on enabling GPU BLAS support",
24992501
{{"n_gpu_layers", params.n_gpu_layers}});
25002502
}
2503+
} else if (arg == "-nkvo" || arg == "--no-kv-offload") {
2504+
params.no_kv_offload = true;
25012505
} else if (arg == "--split-mode" || arg == "-sm") {
25022506
if (++i >= argc) {
25032507
invalid_param = true;

0 commit comments

Comments
 (0)