Skip to content

Commit 940613f

Browse files
ggerganovarthw
authored andcommitted
server : handle models with missing EOS token (ggml-org#8997)
ggml-ci
1 parent 41040f3 commit 940613f

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

examples/server/server.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,7 @@ struct server_context {
631631

632632
bool clean_kv_cache = true;
633633
bool add_bos_token = true;
634+
bool has_eos_token = false;
634635

635636
int32_t n_ctx; // total context for all clients / slots
636637

@@ -693,7 +694,7 @@ struct server_context {
693694
n_ctx = llama_n_ctx(ctx);
694695

695696
add_bos_token = llama_should_add_bos_token(model);
696-
GGML_ASSERT(llama_add_eos_token(model) != 1);
697+
has_eos_token = llama_add_eos_token(model) != 1;
697698

698699
return true;
699700
}
@@ -1031,7 +1032,7 @@ struct server_context {
10311032
{
10321033
slot.sparams.logit_bias.clear();
10331034

1034-
if (json_value(data, "ignore_eos", false)) {
1035+
if (json_value(data, "ignore_eos", false) && has_eos_token) {
10351036
slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
10361037
}
10371038

0 commit comments

Comments
 (0)