Skip to content

Commit 5b74740

Browse files
slarenarthw
authored andcommitted
Revert "llama : add Falcon3 support (ggml-org#10864)" (ggml-org#10876)
This reverts commit 382bc7f.
1 parent 4852a5b commit 5b74740

File tree

3 files changed

+0
-25
lines changed

3 files changed

+0
-25
lines changed

convert_hf_to_gguf.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -525,9 +525,6 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
525525
else:
526526
token: str = reverse_vocab[i]
527527
if token in added_vocab:
528-
# We need to manually encode and decode the added tokens in case special characters
529-
# used for `\n` / `\t` have been manually added in the added tokens
530-
token = tokenizer.decode(tokenizer.encode(token))
531528
if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
532529
toktypes.append(gguf.TokenType.CONTROL)
533530
else:
@@ -574,9 +571,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
574571
if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
575572
# ref: https://huggingface.co/tiiuae/falcon-7b
576573
res = "falcon"
577-
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
578-
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
579-
res = "falcon3"
580574
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
581575
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
582576
res = "bert-bge"

convert_hf_to_gguf_update.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ class TOKENIZER_TYPE(IntEnum):
7272
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
7373
{"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
7474
{"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
75-
{"name": "falcon3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon3-7B-Base", },
7675
{"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
7776
{"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
7877
{"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },

src/llama.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,7 +1616,6 @@ enum llm_chat_template {
16161616
LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
16171617
LLM_CHAT_TEMPLATE_MISTRAL_V7,
16181618
LLM_CHAT_TEMPLATE_PHI_3,
1619-
LLM_CHAT_TEMPLATE_FALCON_3,
16201619
LLM_CHAT_TEMPLATE_ZEPHYR,
16211620
LLM_CHAT_TEMPLATE_MONARCH,
16221621
LLM_CHAT_TEMPLATE_GEMMA,
@@ -1649,7 +1648,6 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
16491648
{ "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
16501649
{ "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
16511650
{ "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
1652-
{ "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
16531651
{ "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
16541652
{ "monarch", LLM_CHAT_TEMPLATE_MONARCH },
16551653
{ "gemma", LLM_CHAT_TEMPLATE_GEMMA },
@@ -6479,11 +6477,6 @@ static void llm_load_vocab(
64796477
} else if (
64806478
tokenizer_pre == "falcon") {
64816479
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
6482-
} else if (
6483-
tokenizer_pre == "falcon3") {
6484-
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
6485-
vocab.tokenizer_ignore_merges = true;
6486-
vocab.tokenizer_add_bos = true;
64876480
} else if (
64886481
tokenizer_pre == "mpt") {
64896482
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;
@@ -22235,8 +22228,6 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2223522228
}
2223622229
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
2223722230
return LLM_CHAT_TEMPLATE_PHI_3;
22238-
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
22239-
return LLM_CHAT_TEMPLATE_FALCON_3;
2224022231
} else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
2224122232
return LLM_CHAT_TEMPLATE_ZEPHYR;
2224222233
} else if (tmpl_contains("bos_token + message['role']")) {
@@ -22389,15 +22380,6 @@ static int32_t llama_chat_apply_template_internal(
2238922380
if (add_ass) {
2239022381
ss << "<|assistant|>\n";
2239122382
}
22392-
} else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
22393-
// Falcon 3
22394-
for (auto message : chat) {
22395-
std::string role(message->role);
22396-
ss << "<|" << role << "|>\n" << message->content << "\n";
22397-
}
22398-
if (add_ass) {
22399-
ss << "<|assistant|>\n";
22400-
}
2240122383
} else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
2240222384
// zephyr template
2240322385
for (auto message : chat) {

0 commit comments

Comments
 (0)