Make whisperfile go 2x faster on 96 core cpu

jart · jart · commit 3b653c7d7603 · 2024-08-01T02:47:22.000-07:00
When decoding a small batch, it's mostly only doing matvec ops. In these
cases, using more cores hurts more than it helps.
diff --git a/whisper.cpp/whisper.cpp b/whisper.cpp/whisper.cpp
@@ -2774,7 +2774,7 @@ static bool whisper_decode_internal(
         whisper_context & wctx,
           whisper_state & wstate,
     const whisper_batch & batch,
-              const int   n_threads,
+                    int   n_threads,
                    bool   save_alignment_heads_QKs,
     ggml_abort_callback   abort_callback,
                    void * abort_callback_data) {
@@ -2866,6 +2866,11 @@ static bool whisper_decode_internal(
 
         logits = gf->nodes[gf->n_nodes - 1];
 
+        if (batch.n_tokens < 16) {
+            if (n_threads > 20)
+                n_threads = 20;
+        }
+
         if (!ggml_graph_compute_helper(sched, gf, n_threads)) {
             return false;
         }