Skip to content

Commit 03fb8a0

Browse files
authored
If first token generated from the server is the stop word the server will crash (#7038)
This will reproduce the issue in llama13b { 'prompt': 'Q: hello world \nA: ', 'stop': ['\n'], 'temperature': 0.0, 'n_predict': 10, 'cache_prompt': True, 'n_probs': 10 }
1 parent 92139b9 commit 03fb8a0

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

examples/server/server.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1383,9 +1383,10 @@ struct server_context {
13831383
if (!slot.params.stream && slot.stopped_word) {
13841384
const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
13851385

1386+
size_t safe_offset = std::min(slot.generated_token_probs.size(), stop_word_toks.size());
13861387
probs = std::vector<completion_token_output>(
13871388
slot.generated_token_probs.begin(),
1388-
slot.generated_token_probs.end() - stop_word_toks.size());
1389+
slot.generated_token_probs.end() - safe_offset);
13891390
} else {
13901391
probs = std::vector<completion_token_output>(
13911392
slot.generated_token_probs.begin(),

0 commit comments

Comments
 (0)