Skip to content

Commit c35bd61

Browse files
giladgdxenova
andauthored
Jinja - add missing features for DeepSeek R1 (#1142)
This PR adds support for the following Jinja features: * Accessing members of call expression results * `.split` on strings - the implementation matches [Python's `str.split(sep=None, maxsplit=-1)` function behavior](https://docs.python.org/3.13/library/stdtypes.html#str.split) [DeepSeek R1's chat template](https://huggingface.co/deepseek-ai/DeepSeek-R1/blob/main/tokenizer_config.json) used to fail on this expression, which now works in this PR: ``` {%- set content = content.split('</think>')[-1] -%} ``` Fixes #1141 --------- Co-authored-by: Joshua Lochner <[email protected]>
1 parent 12a25f3 commit c35bd61

File tree

5 files changed

+329
-41
lines changed

5 files changed

+329
-41
lines changed

packages/jinja/src/index.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ export class Template {
3131
this.parsed = parse(tokens);
3232
}
3333

34-
render(items: Record<string, unknown>): string {
34+
render(items?: Record<string, unknown>): string {
3535
// Create a new environment for this template
3636
const env = new Environment();
3737

@@ -44,8 +44,10 @@ export class Template {
4444
env.set("range", range);
4545

4646
// Add user-defined variables
47-
for (const [key, value] of Object.entries(items)) {
48-
env.set(key, value);
47+
if (items) {
48+
for (const [key, value] of Object.entries(items)) {
49+
env.set(key, value);
50+
}
4951
}
5052

5153
const interpreter = new Interpreter(env);

packages/jinja/src/parser.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ export function parse(tokens: Token[]): Program {
343343
function parseCallMemberExpression(): Statement {
344344
// Handle member expressions recursively
345345

346-
const member = parseMemberExpression(); // foo.x
346+
const member = parseMemberExpression(parsePrimaryExpression()); // foo.x
347347

348348
if (is(TOKEN_TYPES.OpenParen)) {
349349
// foo.x()
@@ -352,15 +352,17 @@ export function parse(tokens: Token[]): Program {
352352
return member;
353353
}
354354

355-
function parseCallExpression(callee: Statement): CallExpression {
356-
let callExpression = new CallExpression(callee, parseArgs());
355+
function parseCallExpression(callee: Statement): Statement {
356+
let expression: Statement = new CallExpression(callee, parseArgs());
357+
358+
expression = parseMemberExpression(expression); // foo.x().y
357359

358360
if (is(TOKEN_TYPES.OpenParen)) {
359361
// foo.x()()
360-
callExpression = parseCallExpression(callExpression);
362+
expression = parseCallExpression(expression);
361363
}
362364

363-
return callExpression;
365+
return expression;
364366
}
365367

366368
function parseArgs(): Statement[] {
@@ -433,9 +435,7 @@ export function parse(tokens: Token[]): Program {
433435
return slices[0] as Statement; // normal member expression
434436
}
435437

436-
function parseMemberExpression(): Statement {
437-
let object = parsePrimaryExpression();
438-
438+
function parseMemberExpression(object: Statement): Statement {
439439
while (is(TOKEN_TYPES.Dot) || is(TOKEN_TYPES.OpenSquareBracket)) {
440440
const operator = tokens[current]; // . or [
441441
++current;

packages/jinja/src/runtime.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,48 @@ export class StringValue extends RuntimeValue<string> {
117117
return new StringValue(this.value.trimStart());
118118
}),
119119
],
120+
[
121+
"split",
122+
// follows Python's `str.split(sep=None, maxsplit=-1)` function behavior
123+
// https://docs.python.org/3.13/library/stdtypes.html#str.split
124+
new FunctionValue((args) => {
125+
const sep = args[0] ?? new NullValue();
126+
if (!(sep instanceof StringValue || sep instanceof NullValue)) {
127+
throw new Error("sep argument must be a string or null");
128+
}
129+
const maxsplit = args[1] ?? new NumericValue(-1);
130+
if (!(maxsplit instanceof NumericValue)) {
131+
throw new Error("maxsplit argument must be a number");
132+
}
133+
134+
let result = [];
135+
if (sep instanceof NullValue) {
136+
// If sep is not specified or is None, runs of consecutive whitespace are regarded as a single separator, and the
137+
// result will contain no empty strings at the start or end if the string has leading or trailing whitespace.
138+
// Trailing whitespace may be present when maxsplit is specified and there aren't sufficient matches in the string.
139+
const text = this.value.trimStart();
140+
for (const { 0: match, index } of text.matchAll(/\S+/g)) {
141+
if (maxsplit.value !== -1 && result.length >= maxsplit.value && index !== undefined) {
142+
result.push(match + text.slice(index + match.length));
143+
break;
144+
}
145+
result.push(match);
146+
}
147+
} else {
148+
// If sep is specified, consecutive delimiters are not grouped together and are deemed to delimit empty strings.
149+
if (sep.value === "") {
150+
throw new Error("empty separator");
151+
}
152+
result = this.value.split(sep.value);
153+
if (maxsplit.value !== -1 && result.length > maxsplit.value) {
154+
// Follow Python's behavior: If maxsplit is given, at most maxsplit splits are done,
155+
// with any remaining text returned as the final element of the list.
156+
result.push(result.splice(maxsplit.value).join(sep.value));
157+
}
158+
}
159+
return new ArrayValue(result.map((part) => new StringValue(part)));
160+
}),
161+
],
120162
]);
121163
}
122164

@@ -543,6 +585,8 @@ export class Interpreter {
543585
}
544586
})
545587
);
588+
case "join":
589+
return new StringValue(operand.value.map((x) => x.value).join(""));
546590
default:
547591
throw new Error(`Unknown ArrayValue filter: ${filter.value}`);
548592
}
@@ -570,6 +614,7 @@ export class Interpreter {
570614
)
571615
.join("\n")
572616
);
617+
case "join":
573618
case "string":
574619
return operand; // no-op
575620
default:
@@ -610,6 +655,24 @@ export class Interpreter {
610655
throw new Error("If set, indent must be a number");
611656
}
612657
return new StringValue(toJSON(operand, indent.value));
658+
} else if (filterName === "join") {
659+
let value;
660+
if (operand instanceof StringValue) {
661+
// NOTE: string.split('') breaks for unicode characters
662+
value = Array.from(operand.value);
663+
} else if (operand instanceof ArrayValue) {
664+
value = operand.value.map((x) => x.value);
665+
} else {
666+
throw new Error(`Cannot apply filter "${filterName}" to type: ${operand.type}`);
667+
}
668+
const [args, kwargs] = this.evaluateArguments(filter.args, environment);
669+
670+
const separator = args.at(0) ?? kwargs.get("separator") ?? new StringValue("");
671+
if (!(separator instanceof StringValue)) {
672+
throw new Error("separator must be a string");
673+
}
674+
675+
return new StringValue(value.join(separator.value));
613676
}
614677

615678
if (operand instanceof ArrayValue) {

packages/jinja/test/e2e.test.js

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)