Skip to content

Commit 61de553

Browse files
committed
server : fix sampling call
ggml-ci
1 parent ce8a35c commit 61de553

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

examples/server/server.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2308,7 +2308,7 @@ struct server_context {
23082308
llama_decode(ctx, slot.batch_spec);
23092309

23102310
// the accepted tokens from the speculation
2311-
const auto ids = common_sampler_sample_n(slot.smpl, ctx, draft);
2311+
const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft);
23122312

23132313
slot.n_past += ids.size();
23142314
slot.n_decoded += ids.size();
@@ -2323,8 +2323,6 @@ struct server_context {
23232323

23242324
id = ids[i];
23252325

2326-
common_sampler_accept(slot.smpl, id, true);
2327-
23282326
result.tok = id;
23292327

23302328
if (!process_token(result, slot)) {

0 commit comments

Comments
 (0)