@@ -1180,8 +1180,9 @@ struct llama_server_context
1180
1180
return slot.images .size () > 0 ;
1181
1181
}
1182
1182
1183
- void send_error (task_server& task, std::string error)
1183
+ void send_error (task_server& task, const std::string & error)
1184
1184
{
1185
+ LOG_TEE (" task %i - error: %s\n " , task.id , error.c_str ());
1185
1186
std::unique_lock<std::mutex> lock (mutex_results);
1186
1187
task_result res;
1187
1188
res.id = task.id ;
@@ -1570,12 +1571,22 @@ struct llama_server_context
1570
1571
LOG_TEE (" slot unavailable\n " );
1571
1572
// send error result
1572
1573
send_error (task, " slot unavailable" );
1573
- return ;
1574
+ break ;
1574
1575
}
1575
1576
1576
1577
if (task.data .contains (" system_prompt" ))
1577
1578
{
1579
+ if (!all_slots_are_idle) {
1580
+ send_error (task, " system prompt can only be updated when all slots are idle" );
1581
+ break ;
1582
+ }
1578
1583
process_system_prompt_data (task.data [" system_prompt" ]);
1584
+
1585
+ // reset cache_tokens for all slots
1586
+ for (llama_client_slot &slot : slots)
1587
+ {
1588
+ slot.cache_tokens .clear ();
1589
+ }
1579
1590
}
1580
1591
1581
1592
slot->reset ();
@@ -1652,8 +1663,7 @@ struct llama_server_context
1652
1663
// attend tasks
1653
1664
process_tasks ();
1654
1665
1655
- // update the system prompt wait until all slots are idle state
1656
- if (system_need_update && all_slots_are_idle)
1666
+ if (system_need_update)
1657
1667
{
1658
1668
LOG_TEE (" updating system prompt\n " );
1659
1669
update_system_prompt ();
0 commit comments