We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ce8a35c commit 61de553Copy full SHA for 61de553
examples/server/server.cpp
@@ -2308,7 +2308,7 @@ struct server_context {
2308
llama_decode(ctx, slot.batch_spec);
2309
2310
// the accepted tokens from the speculation
2311
- const auto ids = common_sampler_sample_n(slot.smpl, ctx, draft);
+ const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft);
2312
2313
slot.n_past += ids.size();
2314
slot.n_decoded += ids.size();
@@ -2323,8 +2323,6 @@ struct server_context {
2323
2324
id = ids[i];
2325
2326
- common_sampler_accept(slot.smpl, id, true);
2327
-
2328
result.tok = id;
2329
2330
if (!process_token(result, slot)) {
0 commit comments