server : fix token duplication when streaming with stop strings (ggml-org#10997)

z80maniac · mglambda · commit ca53987010f8 · 2025-03-08T10:19:59.000+01:00
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -1856,6 +1856,8 @@ struct server_context {
                 result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
                 slot.n_sent_text += result.text_to_send.size();
                 // add the token to slot queue and cache
+            } else {
+                result.text_to_send = "";
             }
 
             slot.add_token(result);

Original file line number	Diff line number	Diff line change
`@@ -1856,6 +1856,8 @@ struct server_context {`
`1856`	`1856`	`result.text_to_send = slot.generated_text.substr(pos, std::string::npos);`
`1857`	`1857`	`slot.n_sent_text += result.text_to_send.size();`
`1858`	`1858`	`// add the token to slot queue and cache`
	`1859`	`+ } else {`
	`1860`	`+ result.text_to_send = "";`
`1859`	`1861`	`}`
`1860`	`1862`
`1861`	`1863`	`slot.add_token(result);`