We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent cc98896 commit a5a915bCopy full SHA for a5a915b
examples/server/server.cpp
@@ -2325,7 +2325,7 @@ struct server_context {
2325
llama_token id = slot.sampled;
2326
2327
struct common_speculative_params params_spec;
2328
- params_spec.n_draft = slot.params.speculative.n_max;
+ params_spec.n_draft = std::min(slot.params.speculative.n_max, slot.n_ctx - slot.n_past - 1);
2329
params_spec.n_reuse = llama_n_ctx(slot.ctx_dft) - slot.params.speculative.n_max;
2330
params_spec.p_min = slot.params.speculative.p_min;
2331
0 commit comments