Skip to content

Commit 11c0df2

Browse files
authored
Detect invalid escape for ecmascript (#1049)
1 parent bc01ec4 commit 11c0df2

File tree

8 files changed

+166
-16
lines changed

8 files changed

+166
-16
lines changed

src/main/java/com/networknt/schema/format/RegexFormat.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,22 @@
1313
*/
1414
package com.networknt.schema.format;
1515

16-
import java.util.regex.Pattern;
17-
import java.util.regex.PatternSyntaxException;
18-
1916
import com.networknt.schema.ExecutionContext;
2017
import com.networknt.schema.Format;
18+
import com.networknt.schema.ValidationContext;
19+
import com.networknt.schema.regex.RegularExpression;
2120

2221
/**
2322
* Format for regex.
2423
*/
2524
public class RegexFormat implements Format {
2625
@Override
27-
public boolean matches(ExecutionContext executionContext, String value) {
26+
public boolean matches(ExecutionContext executionContext, ValidationContext validationContext, String value) {
2827
if (null == value) return true;
2928
try {
30-
Pattern.compile(value);
29+
RegularExpression.compile(value, validationContext);
3130
return true;
32-
33-
} catch (PatternSyntaxException e) {
31+
} catch (RuntimeException e) {
3432
return false;
3533
}
3634
}

src/main/java/com/networknt/schema/regex/JDKRegularExpression.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
import java.util.regex.Pattern;
44

5+
/**
6+
* JDK {@link RegularExpression}.
7+
*/
58
class JDKRegularExpression implements RegularExpression {
69
private final Pattern pattern;
710

src/main/java/com/networknt/schema/regex/JoniRegularExpression.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,28 @@
11
package com.networknt.schema.regex;
22

33
import java.nio.charset.StandardCharsets;
4+
import java.util.regex.Pattern;
45

56
import org.jcodings.specific.UTF8Encoding;
67
import org.joni.Option;
78
import org.joni.Regex;
89
import org.joni.Syntax;
10+
import org.joni.exception.SyntaxException;
911

12+
/**
13+
* ECMAScript {@link RegularExpression}.
14+
*/
1015
class JoniRegularExpression implements RegularExpression {
1116
private final Regex pattern;
17+
private final Pattern INVALID_ESCAPE_PATTERN = Pattern.compile(
18+
".*\\\\([aeg-moqyzACE-OQ-RT-VX-Z1-9]|c$|[pP]([^{]|$)|u([^{0-9]|$)|x([0-9a-fA-F][^0-9a-fA-F]|[^0-9a-fA-F][0-9a-fA-F]|[^0-9a-fA-F][^0-9a-fA-F]|.?$)).*");
1219

1320
JoniRegularExpression(String regex) {
21+
this(regex, Syntax.ECMAScript);
22+
}
23+
24+
JoniRegularExpression(String regex, Syntax syntax) {
25+
validate(regex);
1426
// Joni is too liberal on some constructs
1527
String s = regex
1628
.replace("\\d", "[0-9]")
@@ -21,7 +33,19 @@ class JoniRegularExpression implements RegularExpression {
2133
.replace("\\S", "[^ \\f\\n\\r\\t\\v\\u00a0\\u1680\\u2000-\\u200a\\u2028\\u2029\\u202f\\u205f\\u3000\\ufeff]");
2234

2335
byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
24-
this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, Syntax.ECMAScript);
36+
this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, syntax);
37+
}
38+
39+
protected void validate(String regex) {
40+
// Joni is not strict with escapes
41+
if (INVALID_ESCAPE_PATTERN.matcher(regex).matches()) {
42+
/*
43+
* One option considered was a custom Encoding implementation that rejects
44+
* certain code points but it is unable to distinguish \a vs \cG for instance as
45+
* both translate to BEL
46+
*/
47+
throw new SyntaxException("Invalid escape");
48+
}
2549
}
2650

2751
@Override

src/main/java/com/networknt/schema/regex/RegularExpression.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
import com.networknt.schema.ValidationContext;
44

5+
/**
6+
* Regular expression.
7+
*/
58
@FunctionalInterface
69
public interface RegularExpression {
710
boolean matches(String value);

src/test/java/com/networknt/schema/AbstractJsonSchemaTestSuite.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ private static String toForwardSlashPath(Path file) {
5555

5656
private static void executeTest(JsonSchema schema, TestSpec testSpec) {
5757
Set<ValidationMessage> errors = schema.validate(testSpec.getData(), OutputFormat.DEFAULT, (executionContext, validationContext) -> {
58-
if (testSpec.getTestCase().getSource().getPath().getParent().toString().endsWith("format")) {
58+
if (testSpec.getTestCase().getSource().getPath().getParent().toString().endsWith("format")
59+
|| "ecmascript-regex.json"
60+
.equals(testSpec.getTestCase().getSource().getPath().getFileName().toString())) {
5961
executionContext.getExecutionConfig().setFormatAssertionsEnabled(true);
6062
}
6163
});
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* Copyright (c) 2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.networknt.schema.regex;
17+
18+
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
19+
import static org.junit.jupiter.api.Assertions.assertEquals;
20+
import static org.junit.jupiter.api.Assertions.assertThrows;
21+
22+
import org.joni.exception.SyntaxException;
23+
import org.junit.jupiter.api.Test;
24+
import org.junit.jupiter.params.ParameterizedTest;
25+
import org.junit.jupiter.params.provider.EnumSource;
26+
27+
/**
28+
* Tests for JoniRegularExpression.
29+
*/
30+
class JoniRegularExpressionTest {
31+
32+
enum InvalidEscapeInput {
33+
A("\\a"),
34+
HELLOA("hello\\a"),
35+
C("\\c"),
36+
E("\\e"),
37+
G("\\g"),
38+
H("\\h"),
39+
I("\\i"),
40+
J("\\j"),
41+
K("\\k"),
42+
L("\\l"),
43+
M("\\m"),
44+
O("\\o"),
45+
Q("\\q"),
46+
U("\\u"),
47+
X("\\x"),
48+
X1("\\x1"),
49+
XGG("\\xgg"),
50+
X1G("\\x1g"),
51+
Y("\\y"),
52+
Z("\\z"),
53+
_1("\\1"),
54+
_2("\\2"),
55+
_3("\\3"),
56+
_4("\\4"),
57+
_5("\\5"),
58+
_6("\\6"),
59+
_7("\\7"),
60+
_8("\\8"),
61+
_9("\\9");
62+
63+
String value;
64+
65+
InvalidEscapeInput(String value) {
66+
this.value = value;
67+
}
68+
}
69+
70+
@ParameterizedTest
71+
@EnumSource(InvalidEscapeInput.class)
72+
void invalidEscape(InvalidEscapeInput input) {
73+
SyntaxException e = assertThrows(SyntaxException.class, () -> new JoniRegularExpression(input.value));
74+
assertEquals("Invalid escape", e.getMessage());
75+
}
76+
77+
enum ValidEscapeInput {
78+
B("\\b"),
79+
D("\\d"),
80+
CAP_D("\\D"),
81+
W("\\w"),
82+
CAP_W("\\W"),
83+
S("\\s"),
84+
CAP_S("\\S"),
85+
T("\\t"),
86+
U1234("\\u1234"),
87+
R("\\r"),
88+
N("\\n"),
89+
V("\\v"),
90+
F("\\f"),
91+
X12("\\x12"),
92+
X1F("\\x1f"),
93+
X1234("\\x1234"),
94+
P("\\p{Letter}cole"), // unicode property
95+
CAP_P("\\P{Letter}cole"), // unicode property
96+
_0("\\0"),
97+
CA("\\cA"), // control
98+
CB("\\cB"), // control
99+
CC("\\cC"), // control
100+
CG("\\cG"); // control
101+
102+
String value;
103+
104+
ValidEscapeInput(String value) {
105+
this.value = value;
106+
}
107+
}
108+
109+
@ParameterizedTest
110+
@EnumSource(ValidEscapeInput.class)
111+
void validEscape(ValidEscapeInput input) {
112+
assertDoesNotThrow(() -> new JoniRegularExpression(input.value));
113+
}
114+
115+
@Test
116+
void invalidPropertyName() {
117+
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\p"));
118+
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\P"));
119+
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\pa"));
120+
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\Pa"));
121+
}
122+
}

src/test/suite/tests/draft-next/optional/ecmascript-regex.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,12 +409,12 @@
409409
"description": "\\a is not an ECMA 262 control escape",
410410
"schema": {
411411
"$schema": "https://json-schema.org/draft/next/schema",
412-
"$ref": "https://json-schema.org/draft/next/schema"
412+
"format": "regex"
413413
},
414414
"tests": [
415415
{
416416
"description": "when used as a pattern",
417-
"data": { "pattern": "\\a" },
417+
"data": "\\a",
418418
"valid": false
419419
}
420420
]

src/test/suite/tests/draft2020-12/optional/ecmascript-regex.json

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -409,15 +409,13 @@
409409
"description": "\\a is not an ECMA 262 control escape",
410410
"schema": {
411411
"$schema": "https://json-schema.org/draft/2020-12/schema",
412-
"$ref": "https://json-schema.org/draft/2020-12/schema"
412+
"format": "regex"
413413
},
414414
"tests": [
415415
{
416416
"description": "when used as a pattern",
417-
"data": { "pattern": "\\a" },
418-
"valid": false,
419-
"disabled": true,
420-
"reason": "TODO: RegexFormat does not support ECMA 262 regular expressions"
417+
"data": "\\a",
418+
"valid": false
421419
}
422420
]
423421
},

0 commit comments

Comments
 (0)