Skip to content

Commit cf67172

Browse files
giladgdxenova
andauthored
[jinja] Add support for rejectattr filter (#988)
I've added support for `rejectattr` in Jinja since I encountered some GGUF models that include a chat template that uses it, like [this one](https://huggingface.co/mradermacher/Mistral-Nemo-Instruct-2407-GGUF) for example. --------- Co-authored-by: Joshua Lochner <[email protected]>
1 parent ff46cba commit cf67172

File tree

3 files changed

+55
-7
lines changed

3 files changed

+55
-7
lines changed

packages/jinja/src/runtime.ts

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -614,12 +614,15 @@ export class Interpreter {
614614

615615
if (operand instanceof ArrayValue) {
616616
switch (filterName) {
617-
case "selectattr": {
617+
case "selectattr":
618+
case "rejectattr": {
619+
const select = filterName === "selectattr";
620+
618621
if (operand.value.some((x) => !(x instanceof ObjectValue))) {
619-
throw new Error("`selectattr` can only be applied to array of objects");
622+
throw new Error(`\`${filterName}\` can only be applied to array of objects`);
620623
}
621624
if (filter.args.some((x) => x.type !== "StringLiteral")) {
622-
throw new Error("arguments of `selectattr` must be strings");
625+
throw new Error(`arguments of \`${filterName}\` must be strings`);
623626
}
624627

625628
const [attr, testName, value] = filter.args.map((x) => this.evaluate(x, environment)) as StringValue[];
@@ -640,10 +643,8 @@ export class Interpreter {
640643
// Filter the array using the test function
641644
const filtered = (operand.value as ObjectValue[]).filter((item) => {
642645
const a = item.value.get(attr.value);
643-
if (a) {
644-
return testFunction(a, value);
645-
}
646-
return false;
646+
const result = a ? testFunction(a, value) : false;
647+
return select ? result : !result;
647648
});
648649

649650
return new ArrayValue(filtered);

packages/jinja/test/e2e.test.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,15 @@ const TEST_CUSTOM_TEMPLATES = Object.freeze({
654654
},
655655
target: `<|begin_of_text|>You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> {"type": "function", "function": {"name": get_stock_fundamentals", "description": "get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\n\n Args:\n symbol(str): The stock symbol.\n Returns:\n A dictionary containing fundamental data.\n\nKeys:\n - 'symbol': The stock symbol.\n - 'company_name': The long name of the company.\n - 'sector': The sector to which the company belongs.\n - 'industry': The industry to which the company belongs.\n - 'market_cap': The market capitalization of the company.\n - 'pe_ratio': The forward price-to-earnings ratio.\n - 'pb_ratio': The price-to-book ratio.\n - 'dividend_yield': The dividend yield.\n - 'eps': The trailing earnings per share.\n - 'beta': The beta value of the stock.\n - '52_week_high': The 52-week high price of the stock.\n - '52_week_low': The 52-week low price of the stock.", "parameters": {"type": "object", "properties": {"symbol": {"type": "string", "description": "The stock symbol."}}, "required": ["symbol"]}} </tools>Use the following pydantic model json schema for each tool call you will make: {"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"}\nFor each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:\n<tool_call>\n{"arguments": <args-dict>, "name": <function-name>}\n</tool_call><|im_end|><|im_start|>user\nFetch the stock fundamentals data for Tesla (TSLA)<|im_end|>\n<|im_start|>assistant\n`,
656656
},
657+
"mistralai/Mistral-Nemo-Instruct-2407": {
658+
chat_template: `{%- if messages[0]["role"] == "system" %}\n {%- set system_message = messages[0]["content"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}\n\n{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") | selectattr("tool_calls", "undefined") %}\n {%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %}\n {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message["role"] == "user" %}\n {%- if tools is not none and (message == user_messages[-1]) %}\n {{- "[AVAILABLE_TOOLS][" }}\n {%- for tool in tools %}\n {%- set tool = tool.function %}\n {{- '{"type": "function", "function": {' }}\n {%- for key, val in tool.items() if key != "return" %}\n {%- if val is string %}\n {{- '"' + key + '": "' + val + '"' }}\n {%- else %}\n {{- '"' + key + '": ' + val|tojson }}\n {%- endif %}\n {%- if not loop.last %}\n {{- ", " }}\n {%- endif %}\n {%- endfor %}\n {{- "}}" }}\n {%- if not loop.last %}\n {{- ", " }}\n {%- else %}\n {{- "]" }}\n {%- endif %}\n {%- endfor %}\n {{- "[/AVAILABLE_TOOLS]" }}\n {%- endif %}\n {%- if loop.last and system_message is defined %}\n {{- "[INST]" + system_message + "\\n\\n" + message["content"] + "[/INST]" }}\n {%- else %}\n {{- "[INST]" + message["content"] + "[/INST]" }}\n {%- endif %}\n {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %}\n {%- if message.tool_calls is defined %}\n {%- set tool_calls = message.tool_calls %}\n {%- else %}\n {%- set tool_calls = message.content %}\n {%- endif %}\n {{- "[TOOL_CALLS][" }}\n {%- for tool_call in tool_calls %}\n {%- set out = tool_call.function|tojson %}\n {{- out[:-1] }}\n {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}\n {%- endif %}\n {{- ', "id": "' + tool_call.id + '"}' }}\n {%- if not loop.last %}\n {{- ", " }}\n {%- else %}\n {{- "]" + eos_token }}\n {%- endif %}\n {%- endfor %}\n {%- elif message["role"] == "assistant" %}\n {{- message["content"] + eos_token}}\n {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '[TOOL_RESULTS]{"content": ' + content|string + ", " }}\n {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}\n {%- endif %}\n {{- '"call_id": "' + message.tool_call_id + '"}[/TOOL_RESULTS]' }}\n {%- else %}\n {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}\n {%- endif %}\n{%- endfor %}\n`,
659+
data: {
660+
messages: EXAMPLE_CHAT,
661+
bos_token: "<s>",
662+
eos_token: "</s>"
663+
},
664+
target: `<s>[INST]Hello, how are you?[/INST]I'm doing great. How can I help you today?</s>[INST]I'd like to show off how chat templating works![/INST]`,
665+
},
657666
"meta-llama/Llama-3.1-8B-Instruct": {
658667
chat_template: `{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = "26 Jul 2024" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = "" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- "<|start_header_id|>system<|end_header_id|>\\n\\n" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- "Environment: ipython\\n" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\\n\\n"}}\n{%- endif %}\n{{- "Cutting Knowledge Date: December 2023\\n" }}\n{{- "Today Date: " + date_string + "\\n\\n" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}\n {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}\n {{- "Do not use variables.\\n\\n" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- "\\n\\n" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- "<|eot_id|>" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- "Given the following functions, please respond with a JSON for a function call " }}\n {{- "with its proper arguments that best answers the given prompt.\\n\\n" }}\n {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}\n {{- "Do not use variables.\\n\\n" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- "\\n\\n" }}\n {%- endfor %}\n {{- first_user_message + "<|eot_id|>"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception("This model only supports single tool-calls at once!") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- "<|python_tag|>" + tool_call.name + ".call(" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '="' + arg_val + '"' }}\n {%- if not loop.last %}\n {{- ", " }}\n {%- endif %}\n {%- endfor %}\n {{- ")" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{"name": "' + tool_call.name + '", ' }}\n {{- '"parameters": ' }}\n {{- tool_call.arguments | tojson }}\n {{- "}" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- "<|eom_id|>" }}\n {%- else %}\n {{- "<|eot_id|>" }}\n {%- endif %}\n {%- elif message.role == "tool" or message.role == "ipython" %}\n {{- "<|start_header_id|>ipython<|end_header_id|>\\n\\n" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- "<|eot_id|>" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n`,
659668
data: {

packages/jinja/test/templates.test.js

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ const TEST_STRINGS = {
8787
FILTER_OPERATOR_8: `{{ obj | tojson(indent=2) }}`,
8888
FILTER_OPERATOR_9: `{{ data | map(attribute='val') | list | tojson }}`,
8989
FILTER_OPERATOR_10: `|{{ " 1 \n 2 \n 3 \n\n " | indent }}|{{ " 1 \n 2 \n 3 \n\n " | indent(2) }}|{{ " 1 \n 2 \n 3 \n\n " | indent(first=True) }}|{{ " 1 \n 2 \n 3 \n\n " | indent(blank=True) }}|{{ " 1 \n 2 \n 3 \n\n " | indent(4, first=True) }}|`,
90+
FILTER_OPERATOR_11: `{{ items | rejectattr('key') | length }}`,
91+
FILTER_OPERATOR_12: `{{ messages | rejectattr('role', 'equalto', 'system') | length }}`,
9092

9193
// Logical operators between non-Booleans
9294
BOOLEAN_NUMERICAL: `|{{ 1 and 2 }}|{{ 1 and 0 }}|{{ 0 and 1 }}|{{ 0 and 0 }}|{{ 1 or 2 }}|{{ 1 or 0 }}|{{ 0 or 1 }}|{{ 0 or 0 }}|{{ not 1 }}|{{ not 0 }}|`,
@@ -1625,6 +1627,34 @@ const TEST_PARSED = {
16251627
{ value: "}}", type: "CloseExpression" },
16261628
{ value: "|", type: "Text" },
16271629
],
1630+
FILTER_OPERATOR_11: [
1631+
{ value: "{{", type: "OpenExpression" },
1632+
{ value: "items", type: "Identifier" },
1633+
{ value: "|", type: "Pipe" },
1634+
{ value: "rejectattr", type: "Identifier" },
1635+
{ value: "(", type: "OpenParen" },
1636+
{ value: "key", type: "StringLiteral" },
1637+
{ value: ")", type: "CloseParen" },
1638+
{ value: "|", type: "Pipe" },
1639+
{ value: "length", type: "Identifier" },
1640+
{ value: "}}", type: "CloseExpression" },
1641+
],
1642+
FILTER_OPERATOR_12: [
1643+
{ value: "{{", type: "OpenExpression" },
1644+
{ value: "messages", type: "Identifier" },
1645+
{ value: "|", type: "Pipe" },
1646+
{ value: "rejectattr", type: "Identifier" },
1647+
{ value: "(", type: "OpenParen" },
1648+
{ value: "role", type: "StringLiteral" },
1649+
{ value: ",", type: "Comma" },
1650+
{ value: "equalto", type: "StringLiteral" },
1651+
{ value: ",", type: "Comma" },
1652+
{ value: "system", type: "StringLiteral" },
1653+
{ value: ")", type: "CloseParen" },
1654+
{ value: "|", type: "Pipe" },
1655+
{ value: "length", type: "Identifier" },
1656+
{ value: "}}", type: "CloseExpression" },
1657+
],
16281658

16291659
// Logical operators between non-Booleans
16301660
BOOLEAN_NUMERICAL: [
@@ -2937,6 +2967,12 @@ const TEST_CONTEXT = {
29372967
data: [{ val: 1 }, { val: 2 }, { val: 3 }],
29382968
},
29392969
FILTER_OPERATOR_10: {},
2970+
FILTER_OPERATOR_11: {
2971+
items: [{ key: "a" }, { key: 0 }, { key: 1 }, {}, { key: false }],
2972+
},
2973+
FILTER_OPERATOR_12: {
2974+
messages: [{ role: "system" }, { role: "user" }, { role: "assistant" }],
2975+
},
29402976

29412977
// Logical operators between non-Booleans
29422978
BOOLEAN_NUMERICAL: {},
@@ -3107,6 +3143,8 @@ const EXPECTED_OUTPUTS = {
31073143
FILTER_OPERATOR_8: `{\n "a": [\n 1,\n 2,\n 3\n ],\n "b": 1,\n "c": {\n "d": 2,\n "e": {\n "f": 3,\n "g": {\n "h": 4,\n "i": [\n 1,\n 2,\n 3\n ]\n }\n }\n }\n}`,
31083144
FILTER_OPERATOR_9: `[1, 2, 3]`,
31093145
FILTER_OPERATOR_10: `| 1 \n 2 \n 3 \n\n | 1 \n 2 \n 3 \n\n | 1 \n 2 \n 3 \n\n | 1 \n 2 \n 3 \n \n | 1 \n 2 \n 3 \n\n |`,
3146+
FILTER_OPERATOR_11: `3`,
3147+
FILTER_OPERATOR_12: `2`,
31103148

31113149
// Logical operators between non-Booleans
31123150
BOOLEAN_NUMERICAL: `|2|0|0|0|1|1|1|0|false|true|`,

0 commit comments

Comments
 (0)