Skip to content

Commit 4ef6e82

Browse files
committed
common : don't crash if newline token is not found
1 parent 895407f commit 4ef6e82

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

llama.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3309,7 +3309,12 @@ static void llm_load_vocab(
33093309

33103310
// determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
33113311
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
3312-
vocab.linefeed_id = llama_byte_to_token(vocab, '\n');
3312+
try {
3313+
vocab.linefeed_id = llama_byte_to_token(vocab, '\n');
3314+
} catch (const std::exception & e) {
3315+
LLAMA_LOG_WARN("%s: SPM vocabulary, but newline token not found: %s! Using special_pad_id instead.", __func__, e.what());
3316+
vocab.linefeed_id = vocab.special_pad_id;
3317+
}
33133318
} else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) {
33143319
vocab.linefeed_id = vocab.special_pad_id;
33153320
} else {

0 commit comments

Comments
 (0)