Skip to content

Commit d9a9b09

Browse files
authored
llama.cpp : fix LF token
1 parent 6459cab commit d9a9b09

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

llama.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,7 @@ static void llm_load_hparams(
16361636

16371637
// TODO: This should probably be in llama.h
16381638
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, std::string raw_text, bool bos);
1639+
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch);
16391640

16401641
static void llm_load_vocab(
16411642
llama_model_loader & ml,
@@ -1737,7 +1738,11 @@ static void llm_load_vocab(
17371738
}
17381739

17391740
// determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
1740-
vocab.linefeed_id = llama_tokenize_internal(vocab, "\n", false)[0];
1741+
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
1742+
vocab.linefeed_id = llama_byte_to_token(vocab, '\n');
1743+
} else {
1744+
vocab.linefeed_id = llama_tokenize_internal(vocab, "\n", false)[0];
1745+
}
17411746

17421747
// special tokens
17431748
GGUF_GET_KEY(ctx, vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_BOS_ID));

0 commit comments

Comments
 (0)