lazy fix for llama-bench (runs without pp_threads support)

netrunnereve · netrunnereve · commit 1c154e9ea5a7 · 2023-08-18T17:49:04.000-04:00
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
@@ -853,15 +853,15 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_bat
     int n_processed = 0;
     while (n_processed < n_prompt) {
         int n_tokens = std::min(n_prompt - n_processed, n_batch);
-        llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads);
+        llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads, n_threads);
         n_processed += n_tokens;
     }
 }
 
 static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
     llama_token token = llama_token_bos();
     for (int i = 0; i < n_gen; i++) {
-        llama_eval(ctx, &token, 1, n_past + i, n_threads);
+        llama_eval(ctx, &token, 1, n_past + i, n_threads, n_threads);
     }
 }
 

Original file line number	Diff line number	Diff line change
`@@ -853,15 +853,15 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_bat`
`853`	`853`	`int n_processed = 0;`
`854`	`854`	`while (n_processed < n_prompt) {`
`855`	`855`	`int n_tokens = std::min(n_prompt - n_processed, n_batch);`
`856`		`- llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads);`
	`856`	`+ llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads, n_threads);`
`857`	`857`	`n_processed += n_tokens;`
`858`	`858`	`}`
`859`	`859`	`}`
`860`	`860`
`861`	`861`	`static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {`
`862`	`862`	`llama_token token = llama_token_bos();`
`863`	`863`	`for (int i = 0; i < n_gen; i++) {`
`864`		`- llama_eval(ctx, &token, 1, n_past + i, n_threads);`
	`864`	`+ llama_eval(ctx, &token, 1, n_past + i, n_threads, n_threads);`
`865`	`865`	`}`
`866`	`866`	`}`
`867`	`867`