Skip to content

Commit fffcce5

Browse files
authored
llama-bench : add --no-warmup flag (#14224) (#14270)
Add no_warmup parameter to cmd_params struct and command-line parsing to allow users to skip warmup runs before benchmarking. - Add no_warmup boolean field to cmd_params struct - Add --no-warmup command-line argument parsing - Add help text documentation for the new flag - Wrap existing warmup logic in conditional check - Maintain full backward compatibility (warmup enabled by default) Addresses #14224
1 parent 5fc7856 commit fffcce5

File tree

1 file changed

+26
-18
lines changed

1 file changed

+26
-18
lines changed

tools/llama-bench/llama-bench.cpp

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ struct cmd_params {
267267
int delay;
268268
bool verbose;
269269
bool progress;
270+
bool no_warmup;
270271
output_formats output_format;
271272
output_formats output_format_stderr;
272273
};
@@ -303,6 +304,7 @@ static const cmd_params cmd_params_defaults = {
303304
/* delay */ 0,
304305
/* verbose */ false,
305306
/* progress */ false,
307+
/* no_warmup */ false,
306308
/* output_format */ MARKDOWN,
307309
/* output_format_stderr */ NONE,
308310
};
@@ -325,6 +327,7 @@ static void print_usage(int /* argc */, char ** argv) {
325327
output_format_str(cmd_params_defaults.output_format_stderr));
326328
printf(" -v, --verbose verbose output\n");
327329
printf(" --progress print test progress indicators\n");
330+
printf(" --no-warmup skip warmup runs before benchmarking\n");
328331
printf("\n");
329332
printf("test parameters:\n");
330333
printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
@@ -425,6 +428,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
425428
params.prio = cmd_params_defaults.prio;
426429
params.delay = cmd_params_defaults.delay;
427430
params.progress = cmd_params_defaults.progress;
431+
params.no_warmup = cmd_params_defaults.no_warmup;
428432

429433
for (int i = 1; i < argc; i++) {
430434
arg = argv[i];
@@ -798,6 +802,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
798802
params.verbose = true;
799803
} else if (arg == "--progress") {
800804
params.progress = true;
805+
} else if (arg == "--no-warmup") {
806+
params.no_warmup = true;
801807
} else {
802808
invalid_param = true;
803809
break;
@@ -1925,25 +1931,27 @@ int main(int argc, char ** argv) {
19251931
llama_attach_threadpool(ctx, threadpool, NULL);
19261932

19271933
// warmup run
1928-
if (t.n_prompt > 0) {
1929-
if (params.progress) {
1930-
fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup prompt run\n", params_idx, params_count);
1931-
}
1932-
//test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1933-
bool res = test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads);
1934-
if (!res) {
1935-
fprintf(stderr, "%s: error: failed to run prompt warmup\n", __func__);
1936-
exit(1);
1937-
}
1938-
}
1939-
if (t.n_gen > 0) {
1940-
if (params.progress) {
1941-
fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup generation run\n", params_idx, params_count);
1934+
if (!params.no_warmup) {
1935+
if (t.n_prompt > 0) {
1936+
if (params.progress) {
1937+
fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup prompt run\n", params_idx, params_count);
1938+
}
1939+
//test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1940+
bool res = test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads);
1941+
if (!res) {
1942+
fprintf(stderr, "%s: error: failed to run prompt warmup\n", __func__);
1943+
exit(1);
1944+
}
19421945
}
1943-
bool res = test_gen(ctx, 1, t.n_threads);
1944-
if (!res) {
1945-
fprintf(stderr, "%s: error: failed to run gen warmup\n", __func__);
1946-
exit(1);
1946+
if (t.n_gen > 0) {
1947+
if (params.progress) {
1948+
fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup generation run\n", params_idx, params_count);
1949+
}
1950+
bool res = test_gen(ctx, 1, t.n_threads);
1951+
if (!res) {
1952+
fprintf(stderr, "%s: error: failed to run gen warmup\n", __func__);
1953+
exit(1);
1954+
}
19471955
}
19481956
}
19491957

0 commit comments

Comments
 (0)