Skip to content

Commit 15b67a6

Browse files
authored
llama-bench : use two tokens in the warmup run for prompt evals (#3059)
1 parent be8c9c2 commit 15b67a6

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

examples/llama-bench/llama-bench.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -986,7 +986,12 @@ int main(int argc, char ** argv) {
986986
test t(inst, lmodel, ctx);
987987

988988
// warmup run
989-
test_gen(ctx, 1, 0, t.n_threads);
989+
if (t.n_prompt > 0) {
990+
test_prompt(ctx, std::min(2, t.n_batch), 0, t.n_batch, t.n_threads);
991+
}
992+
if (t.n_gen > 0) {
993+
test_gen(ctx, 1, 0, t.n_threads);
994+
}
990995

991996
for (int i = 0; i < params.reps; i++) {
992997
uint64_t t_start = get_time_ns();

0 commit comments

Comments
 (0)