@@ -6509,21 +6509,21 @@ static void llm_load_vocab(
6509
6509
// for now, we apply this workaround to find the EOT token based on its text
6510
6510
if (vocab.special_eot_id == -1) {
6511
6511
for (const auto & t : vocab.token_to_id) {
6512
- if (
6512
+ if (false
6513
6513
// TODO: gemma "<end_of_turn>" is exported as a normal token, so the following check does not work
6514
6514
// need to fix convert script
6515
6515
//vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
6516
- ( t.first == "<|eot_id|>" ||
6517
- t.first == "<|im_end|>" ||
6518
- t.first == "<|end|>" ||
6519
- t.first == "<end_of_turn>" ||
6520
- t.first == "<|endoftext|>"
6521
- )
6516
+ || t.first == "<|eot_id|>"
6517
+ || t.first == "<|im_end|>"
6518
+ || t.first == "<|end|>"
6519
+ || t.first == "<end_of_turn>"
6520
+ || t.first == "<|endoftext|>"
6521
+ || t.first == "<EOT>"
6522
6522
) {
6523
6523
vocab.special_eot_id = t.second;
6524
6524
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
6525
6525
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
6526
- __func__, t.first.c_str());
6526
+ __func__, t.first.c_str());
6527
6527
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
6528
6528
}
6529
6529
break;
@@ -6546,6 +6546,44 @@ static void llm_load_vocab(
6546
6546
}
6547
6547
}
6548
6548
}
6549
+
6550
+ // maintain a list of tokens that cause end-of-generation
6551
+ // this is currently determined based on the token text, which is obviously not ideal
6552
+ // ref: https://github.com/ggerganov/llama.cpp/issues/9606
6553
+ vocab.special_eog_ids.clear();
6554
+ for (const auto & t : vocab.token_to_id) {
6555
+ if (false
6556
+ || t.first == "<|eot_id|>"
6557
+ || t.first == "<|im_end|>"
6558
+ || t.first == "<|end|>"
6559
+ || t.first == "<end_of_turn>"
6560
+ || t.first == "<|endoftext|>"
6561
+ || t.first == "<|eom_id|>"
6562
+ || t.first == "<EOT>"
6563
+ ) {
6564
+ vocab.special_eog_ids.insert(t.second);
6565
+ if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
6566
+ LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
6567
+ __func__, t.first.c_str());
6568
+ vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
6569
+ }
6570
+ }
6571
+ }
6572
+
6573
+ if (vocab.special_eos_id != -1 && vocab.special_eog_ids.count(vocab.special_eos_id) == 0) {
6574
+ vocab.special_eog_ids.insert(vocab.special_eos_id);
6575
+ LLAMA_LOG_WARN("%s: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
6576
+ }
6577
+
6578
+ if (vocab.special_eot_id != -1 && vocab.special_eog_ids.count(vocab.special_eot_id) == 0) {
6579
+ vocab.special_eog_ids.insert(vocab.special_eot_id);
6580
+ LLAMA_LOG_WARN("%s: special_eot_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
6581
+ }
6582
+
6583
+ if (vocab.special_eom_id != -1 && vocab.special_eog_ids.count(vocab.special_eom_id) == 0) {
6584
+ vocab.special_eog_ids.insert(vocab.special_eom_id);
6585
+ LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
6586
+ }
6549
6587
}
6550
6588
6551
6589
// build special tokens cache
@@ -6749,6 +6787,11 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
6749
6787
if (vocab.special_suffix_id != -1) { LLAMA_LOG_INFO( "%s: SUF token = %d '%s'\n", __func__, vocab.special_suffix_id, vocab.id_to_token[vocab.special_suffix_id].text.c_str() ); }
6750
6788
if (vocab.special_middle_id != -1) { LLAMA_LOG_INFO( "%s: MID token = %d '%s'\n", __func__, vocab.special_middle_id, vocab.id_to_token[vocab.special_middle_id].text.c_str() ); }
6751
6789
if (vocab.special_eot_id != -1) { LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, vocab.special_eot_id, vocab.id_to_token[vocab.special_eot_id].text.c_str() ); }
6790
+ if (vocab.special_eom_id != -1) { LLAMA_LOG_INFO( "%s: EOM token = %d '%s'\n", __func__, vocab.special_eom_id, vocab.id_to_token[vocab.special_eom_id].text.c_str() ); }
6791
+
6792
+ for (const auto & id : vocab.special_eog_ids) {
6793
+ LLAMA_LOG_INFO( "%s: EOG token = %d '%s'\n", __func__, id, vocab.id_to_token[id].text.c_str() );
6794
+ }
6752
6795
6753
6796
LLAMA_LOG_INFO("%s: max token length = %d\n", __func__, vocab.max_token_len);
6754
6797
0 commit comments