server : handle models with missing EOS token (ggml-org#8997)

ggerganov · arthw · commit 940613f11877 · 2024-11-15T11:36:38.000+08:00
ggml-ci
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -631,6 +631,7 @@ struct server_context {
 
     bool clean_kv_cache = true;
     bool add_bos_token  = true;
+    bool has_eos_token  = false;
 
     int32_t n_ctx; // total context for all clients / slots
 
@@ -693,7 +694,7 @@ struct server_context {
         n_ctx = llama_n_ctx(ctx);
 
         add_bos_token = llama_should_add_bos_token(model);
-        GGML_ASSERT(llama_add_eos_token(model) != 1);
+        has_eos_token = llama_add_eos_token(model) != 1;
 
         return true;
     }
@@ -1031,7 +1032,7 @@ struct server_context {
         {
             slot.sparams.logit_bias.clear();
 
-            if (json_value(data, "ignore_eos", false)) {
+            if (json_value(data, "ignore_eos", false) && has_eos_token) {
                 slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
             }
 

Original file line number	Diff line number	Diff line change
`@@ -631,6 +631,7 @@ struct server_context {`
`631`	`631`
`632`	`632`	`bool clean_kv_cache = true;`
`633`	`633`	`bool add_bos_token = true;`
	`634`	`+ bool has_eos_token = false;`
`634`	`635`
`635`	`636`	`int32_t n_ctx; // total context for all clients / slots`
`636`	`637`
`@@ -693,7 +694,7 @@ struct server_context {`
`693`	`694`	`n_ctx = llama_n_ctx(ctx);`
`694`	`695`
`695`	`696`	`add_bos_token = llama_should_add_bos_token(model);`
`696`		`- GGML_ASSERT(llama_add_eos_token(model) != 1);`
	`697`	`+ has_eos_token = llama_add_eos_token(model) != 1;`
`697`	`698`
`698`	`699`	`return true;`
`699`	`700`	`}`
`@@ -1031,7 +1032,7 @@ struct server_context {`
`1031`	`1032`	`{`
`1032`	`1033`	`slot.sparams.logit_bias.clear();`
`1033`	`1034`
`1034`		`- if (json_value(data, "ignore_eos", false)) {`
	`1035`	`+ if (json_value(data, "ignore_eos", false) && has_eos_token) {`
`1035`	`1036`	`slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;`
`1036`	`1037`	`}`
`1037`	`1038`