Skip to content

Commit 9f47fa5

Browse files
authored
vocab : warn about missing mask token (#14022)
1 parent 9e31bec commit 9f47fa5

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/llama-vocab.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2098,7 +2098,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
20982098
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
20992099
|| _contains_any(general_arch, {"nomic-bert-moe"})
21002100
) {
2101-
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
2101+
if (token_to_id.count("<mask>") == 0) {
2102+
LLAMA_LOG_WARN("%s: Mask token is missing in vocab, please reconvert model!\n", __func__);
2103+
} else {
2104+
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
2105+
}
21022106
} else if (_contains_any(model_name, {"phi-3", "phi3"})) {
21032107
for (auto id : cache_special_tokens) {
21042108
_set_tokenid_attr(id, LLAMA_TOKEN_ATTR_RSTRIP, true);

0 commit comments

Comments
 (0)