Skip to content

Commit 1df62f0

Browse files
committed
context : fix graph reserve for multiple sequences
ggml-ci
1 parent 1250e28 commit 1df62f0

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

src/llama-context.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,6 +1219,13 @@ ggml_cgraph * llama_context::graph_init() {
12191219
ggml_cgraph * llama_context::graph_reserve(uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs) {
12201220
LLAMA_LOG_DEBUG("%s: reserving a graph for ubatch with n_tokens = %4u, n_seqs = %2u, n_outputs = %4u\n", __func__, n_tokens, n_seqs, n_outputs);
12211221

1222+
if (n_tokens % n_seqs != 0) {
1223+
n_tokens = (n_tokens / n_seqs) * n_seqs;
1224+
n_outputs = std::min(n_outputs, n_tokens);
1225+
1226+
LLAMA_LOG_DEBUG("%s: making n_tokens a multiple of n_seqs - n_tokens = %u, n_seqs = %u, n_outputs = %u\n", __func__, n_tokens, n_seqs, n_outputs);
1227+
}
1228+
12221229
// store the n_outputs as it is, and restore it afterwards
12231230
// TODO: not sure if needed, might simplify in the future by removing this
12241231
const auto save_n_outputs = this->n_outputs;

0 commit comments

Comments
 (0)