Skip to content

Commit 4ffc7a1

Browse files
authored
server : various fixes for the prompt field in /completion (#5300)
server : fix deadlock when prompt array contains strings and numbers server : removed an unnecessary generation when generating multi-prompts server : removed an unnecessary assert
1 parent 906cff5 commit 4ffc7a1

File tree

1 file changed

+27
-7
lines changed

1 file changed

+27
-7
lines changed

examples/server/server.cpp

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,13 +1163,30 @@ struct llama_server_context
11631163
task.multitask_id = multitask_id;
11641164

11651165
// when a completion task's prompt array is not a singleton, we split it into multiple requests
1166-
if (task.data.count("prompt") && task.data.at("prompt").size() > 1)
1167-
{
1168-
split_multiprompt_task(task_id, task);
1169-
}
1170-
11711166
// otherwise, it's a single-prompt task, we actually queue it
1172-
queue_tasks.post(task);
1167+
// if there's numbers in the prompt array it will be treated as an array of tokens
1168+
if (task.data.count("prompt") != 0 && task.data.at("prompt").size() > 1) {
1169+
bool numbers = false;
1170+
for (const auto& e : task.data.at("prompt")) {
1171+
if (e.is_number()) {
1172+
numbers = true;
1173+
break;
1174+
}
1175+
}
1176+
1177+
// NOTE: split_multiprompt_task() does not handle a mix of strings and numbers,
1178+
// it will completely stall the server. I don't know where the bug for this is.
1179+
//
1180+
// if there are numbers, it needs to be treated like a single prompt,
1181+
// queue_tasks handles a mix of strings and numbers just fine.
1182+
if (numbers) {
1183+
queue_tasks.post(task);
1184+
} else {
1185+
split_multiprompt_task(task_id, task);
1186+
}
1187+
} else {
1188+
queue_tasks.post(task);
1189+
}
11731190
}
11741191

11751192
// for multiple images processing
@@ -1251,7 +1268,10 @@ struct llama_server_context
12511268
void split_multiprompt_task(int multitask_id, task_server& multiprompt_task)
12521269
{
12531270
int prompt_count = multiprompt_task.data.at("prompt").size();
1254-
assert(prompt_count > 1);
1271+
if (prompt_count <= 1) {
1272+
send_error(multiprompt_task, "error while handling multiple prompts");
1273+
return;
1274+
}
12551275

12561276
// generate all the ID for subtask
12571277
std::vector<int> subtask_ids(prompt_count);

0 commit comments

Comments
 (0)