@@ -267,6 +267,7 @@ struct cmd_params {
267
267
int delay;
268
268
bool verbose;
269
269
bool progress;
270
+ bool no_warmup;
270
271
output_formats output_format;
271
272
output_formats output_format_stderr;
272
273
};
@@ -303,6 +304,7 @@ static const cmd_params cmd_params_defaults = {
303
304
/* delay */ 0 ,
304
305
/* verbose */ false ,
305
306
/* progress */ false ,
307
+ /* no_warmup */ false ,
306
308
/* output_format */ MARKDOWN,
307
309
/* output_format_stderr */ NONE,
308
310
};
@@ -325,6 +327,7 @@ static void print_usage(int /* argc */, char ** argv) {
325
327
output_format_str (cmd_params_defaults.output_format_stderr ));
326
328
printf (" -v, --verbose verbose output\n " );
327
329
printf (" --progress print test progress indicators\n " );
330
+ printf (" --no-warmup skip warmup runs before benchmarking\n " );
328
331
printf (" \n " );
329
332
printf (" test parameters:\n " );
330
333
printf (" -m, --model <filename> (default: %s)\n " , join (cmd_params_defaults.model , " ," ).c_str ());
@@ -425,6 +428,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
425
428
params.prio = cmd_params_defaults.prio ;
426
429
params.delay = cmd_params_defaults.delay ;
427
430
params.progress = cmd_params_defaults.progress ;
431
+ params.no_warmup = cmd_params_defaults.no_warmup ;
428
432
429
433
for (int i = 1 ; i < argc; i++) {
430
434
arg = argv[i];
@@ -798,6 +802,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
798
802
params.verbose = true ;
799
803
} else if (arg == " --progress" ) {
800
804
params.progress = true ;
805
+ } else if (arg == " --no-warmup" ) {
806
+ params.no_warmup = true ;
801
807
} else {
802
808
invalid_param = true ;
803
809
break ;
@@ -1925,25 +1931,27 @@ int main(int argc, char ** argv) {
1925
1931
llama_attach_threadpool (ctx, threadpool, NULL );
1926
1932
1927
1933
// warmup run
1928
- if (t.n_prompt > 0 ) {
1929
- if (params.progress ) {
1930
- fprintf (stderr, " llama-bench: benchmark %d/%zu: warmup prompt run\n " , params_idx, params_count);
1931
- }
1932
- // test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1933
- bool res = test_prompt (ctx, t.n_prompt , t.n_batch , t.n_threads );
1934
- if (!res) {
1935
- fprintf (stderr, " %s: error: failed to run prompt warmup\n " , __func__);
1936
- exit (1 );
1937
- }
1938
- }
1939
- if (t.n_gen > 0 ) {
1940
- if (params.progress ) {
1941
- fprintf (stderr, " llama-bench: benchmark %d/%zu: warmup generation run\n " , params_idx, params_count);
1934
+ if (!params.no_warmup ) {
1935
+ if (t.n_prompt > 0 ) {
1936
+ if (params.progress ) {
1937
+ fprintf (stderr, " llama-bench: benchmark %d/%zu: warmup prompt run\n " , params_idx, params_count);
1938
+ }
1939
+ // test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1940
+ bool res = test_prompt (ctx, t.n_prompt , t.n_batch , t.n_threads );
1941
+ if (!res) {
1942
+ fprintf (stderr, " %s: error: failed to run prompt warmup\n " , __func__);
1943
+ exit (1 );
1944
+ }
1942
1945
}
1943
- bool res = test_gen (ctx, 1 , t.n_threads );
1944
- if (!res) {
1945
- fprintf (stderr, " %s: error: failed to run gen warmup\n " , __func__);
1946
- exit (1 );
1946
+ if (t.n_gen > 0 ) {
1947
+ if (params.progress ) {
1948
+ fprintf (stderr, " llama-bench: benchmark %d/%zu: warmup generation run\n " , params_idx, params_count);
1949
+ }
1950
+ bool res = test_gen (ctx, 1 , t.n_threads );
1951
+ if (!res) {
1952
+ fprintf (stderr, " %s: error: failed to run gen warmup\n " , __func__);
1953
+ exit (1 );
1954
+ }
1947
1955
}
1948
1956
}
1949
1957
0 commit comments