Skip to content

Commit bc5ba00

Browse files
authored
server : check that the prompt fits in the slot's context (#10030)
ggml-ci
1 parent 958367b commit bc5ba00

File tree

3 files changed

+10
-1
lines changed

3 files changed

+10
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
573573
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
574574
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
575575
res = "bert-bge"
576+
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
577+
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
578+
res = "bert-bge-large"
576579
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
577580
# ref: https://huggingface.co/mosaicml/mpt-7b
578581
res = "mpt"

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class TOKENIZER_TYPE(IntEnum):
7272
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
7373
{"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
7474
{"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
75+
{"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
7576
{"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
7677
{"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
7778
{"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },

examples/server/server.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1882,12 +1882,17 @@ struct server_context {
18821882
}
18831883

18841884
if (slot.inf_type == SERVER_TASK_INF_TYPE_EMBEDDING || slot.inf_type == SERVER_TASK_INF_TYPE_RERANK) {
1885-
// this prompt is too large to process - discard it
18861885
if (slot.n_prompt_tokens > n_ubatch) {
18871886
slot.release();
18881887
send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER);
18891888
continue;
18901889
}
1890+
1891+
if (slot.n_prompt_tokens > slot.n_ctx) {
1892+
slot.release();
1893+
send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_SERVER);
1894+
continue;
1895+
}
18911896
} else {
18921897
if (!params.ctx_shift) {
18931898
// if context shift is disabled, we make sure prompt size is smaller than KV size

0 commit comments

Comments
 (0)