Skip to content

Commit c4801a9

Browse files
authored
Merge pull request #1 from ochafik/Telosnex_phi4_tools_template
Fixes for phi-4 support
2 parents 769cd71 + 3f40aec commit c4801a9

File tree

4 files changed

+57
-94
lines changed

4 files changed

+57
-94
lines changed

common/chat.cpp

Lines changed: 10 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -580,10 +580,7 @@ static common_chat_msg parse_json_tool_calls(
580580
}
581581

582582
if (!result.tool_calls.empty()) {
583-
if (!string_strip(result.content).empty()) {
584-
LOG_WRN("Content found with tool calls: %s\n", result.content.c_str());
585-
}
586-
result.content = "";
583+
result.content = string_strip(result.content);
587584
}
588585
return result;
589586
}
@@ -1359,14 +1356,15 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa
13591356
std::string name = function.at("name");
13601357
auto parameters = function.at("parameters");
13611358
builder.resolve_refs(parameters);
1362-
tool_rules.push_back(builder.add_schema(name + "-call", {
1359+
auto call_rule = builder.add_schema(name + "-call", {
13631360
{"type", "object"},
13641361
{"properties", {
13651362
{"name", {{"const", name}}},
13661363
{"arguments", parameters},
13671364
}},
13681365
{"required", json::array({"name", "arguments"})},
1369-
}));
1366+
});
1367+
tool_rules.push_back(builder.add_rule(name + "-call", "\"<|tool_call|>\" " + call_rule + " \"<|/tool_call|>\""));
13701368
});
13711369
auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
13721370
std::vector<std::string> alt_tags {
@@ -1379,6 +1377,9 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa
13791377
data.preserved_tokens = {
13801378
"<|tool_call|>",
13811379
"</|tool_call|>",
1380+
"<|tool_response|>",
1381+
"<|tool|>",
1382+
"</|tool|>",
13821383
};
13831384
});
13841385

@@ -1437,89 +1438,9 @@ static common_chat_params common_chat_params_init_phi_4(const common_chat_templa
14371438
}
14381439

14391440
static common_chat_msg common_chat_parse_phi_4(const std::string & input) {
1440-
common_chat_msg result;
1441-
result.role = "assistant";
1442-
1443-
std::string final_content = "";
1444-
1445-
const std::string opening_tag = "<|tool_call|>";
1446-
const std::string closing_tag = "</|tool_call|>";
1447-
1448-
size_t start_pos = 0;
1449-
while (true) {
1450-
// Find next tool call
1451-
size_t tool_start = input.find(opening_tag, start_pos);
1452-
if (tool_start == std::string::npos) {
1453-
// No more tool calls.
1454-
1455-
// Is start_pos within string bounds?
1456-
if (start_pos < input.length()) {
1457-
// Add the rest of the string to final_content
1458-
final_content += input.substr(start_pos);
1459-
}
1460-
break;
1461-
}
1462-
1463-
// Add content before the tool call to final_content
1464-
final_content += input.substr(start_pos, tool_start - start_pos);
1465-
1466-
// Find closing tag
1467-
size_t content_start = tool_start + opening_tag.length();
1468-
size_t tool_end = input.find(closing_tag, content_start);
1469-
1470-
if (tool_end == std::string::npos) {
1471-
// No closing tag found, so just include the rest of the string as tool.
1472-
tool_end = input.length();
1473-
}
1474-
1475-
// Extract tool call content
1476-
std::string tool_content = input.substr(
1477-
content_start,
1478-
tool_end - content_start
1479-
);
1480-
1481-
// Try to parse the tool call
1482-
try {
1483-
auto tool_call = json::parse(tool_content);
1484-
1485-
// Verify the required fields exist
1486-
if (!tool_call.contains("name")) {
1487-
throw std::runtime_error("Missing 'name' field in tool call");
1488-
}
1489-
1490-
if (!tool_call.contains("arguments")) {
1491-
throw std::runtime_error("Missing 'arguments' field in tool call");
1492-
}
1493-
1494-
std::string name = tool_call["name"].get<std::string>();
1495-
1496-
std::string arguments;
1497-
try {
1498-
arguments = tool_call["arguments"].dump();
1499-
} catch (const std::exception & e) {
1500-
LOG_ERR("Failed to serialize arguments: %s\n", e.what());
1501-
arguments = "{}";
1502-
}
1503-
1504-
result.tool_calls.push_back({
1505-
name,
1506-
arguments,
1507-
/* id= */ "",
1508-
});
1509-
} catch (const std::exception & e) {
1510-
// If parsing fails, include the entire tool call in the content
1511-
final_content += input.substr(
1512-
tool_start,
1513-
tool_end + closing_tag.length() - tool_start
1514-
);
1515-
}
1516-
1517-
// Move past this tool call for next iteration
1518-
start_pos = tool_end + closing_tag.length();
1519-
}
1520-
1521-
result.content = final_content;
1522-
return result;
1441+
static std::regex function_regex("<\\|tool_call\\|>\\s*\\{\\s*\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"arguments\"\\s*:");
1442+
static std::regex close_regex(R"(\}\s*(</\|tool_call\|>)?)");
1443+
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
15231444
}
15241445

15251446

docs/function-calling.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ Function calling is supported for all models (see https://github.com/ggml-org/ll
1212
- Llama 3.1 / 3.3 (including builtin tools support - tool names for `wolfram_alpha`, `web_search` / `brave_search`, `code_interpreter`), Llama 3.2
1313
- Functionary v3.1 / v3.2
1414
- Hermes 2/3, Qwen 2.5
15-
- Qwen 2.5 Coder (WIP: https://github.com/ggml-org/llama.cpp/pull/12034)
15+
- Qwen 2.5 Coder (#12034)
1616
- Mistral Nemo
1717
- Firefunction v2
18-
- Command R7B
19-
- DeepSeek R1 (WIP / seems reluctant to call any tools?)
18+
- Command R7B (#11585)
19+
- DeepSeek R1 (#11607)
20+
- Phi 4 (#12288)
2021

2122
- Generic tool call is supported when the template isn't recognized by native format handlers (you'll see `Chat format: Generic` in the logs).
2223
- Use `--chat-template-file` to override the template when appropriate (see examples below)
@@ -297,9 +298,14 @@ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \
297298
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \
298299
--chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
299300

301+
# Native support for Phi 4 also needs a template override (official template is buggy)
302+
303+
llama-server --jinja -fa -hf bartowski/microsoft_Phi-4-mini-instruct-GGUF \
304+
--chat-template-file models/templates/llama-cpp-microsoft-Phi-4-mini-instruct.jinja
305+
300306
# Native support requires the right template for these GGUFs:
301307

302-
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
308+
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M \
303309
--chat-template-file models/templates/meetkai-functionary-medium-v3.2.jinja
304310

305311
llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \

examples/server/server.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,8 @@ struct server_task {
384384
SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str());
385385
common_grammar_trigger trigger;
386386
trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
387-
trigger.value = (llama_token) token;
387+
trigger.value = word;
388+
trigger.token = token;
388389
params.sampling.grammar_triggers.push_back(trigger);
389390
} else {
390391
SRV_DBG("Grammar trigger word: `%s`\n", word.c_str());
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{%- if messages[0]["role"] == "system" %}
2+
{%- set system_message = messages[0]["content"] %}
3+
{% elif tools is defined -%}
4+
{%- set system_message = "You are a helpful assistant with access to tools." -%}
5+
{% else %}
6+
{%- set system_message = "" -%}
7+
{%- endif %}
8+
{%- if tools is defined -%}
9+
{%- set system_message = system_message + '<|tool|>' + (tools | tojson) + '<|/tool|>' -%}
10+
{%- if '<|tool_call|>' not in system_message -%}
11+
{%- set system_message = system_message + "\nTo use a tool, respond in this format: <|tool_call|>{\"name\": \"foo\", \"arguments\": {\"a\": 1}}<|/tool_call|>" %}
12+
{%- endif %}
13+
{%- endif %}
14+
{%- if system_message is defined -%}
15+
{{- '<|system|>' + system_message + '<|end|>' -}}
16+
{%- endif -%}
17+
{%- for message in messages -%}
18+
{%- if message['role'] == 'tool' -%}
19+
{{- '<|tool_response|>' + (message['content'] | tojson) + '<|/tool_response|>' -}}
20+
{%- elif message['role'] != 'system' -%}
21+
{{- '<|' + message['role'] + '|>' -}}
22+
{%- if message.content -%}
23+
{{- message['content'] -}}
24+
{%- endif -%}
25+
{%- for tool_call in message.tool_calls -%}
26+
{{- '<|tool_call|>' + (tool_call | tojson) + '<|/tool_call|>' -}}
27+
{%- endfor -%}
28+
{{- '<|end|>' -}}
29+
{%- endif -%}
30+
{%- endfor -%}
31+
{%- if add_generation_prompt -%}
32+
{{- '<|assistant|>' -}}
33+
{%- else -%}
34+
{{- eos_token -}}
35+
{%- endif -%}

0 commit comments

Comments
 (0)