Skip to content

Commit 3daa5bc

Browse files
authored
Add GraalJS regular expression and factory implementation (#1058)
1 parent b063972 commit 3daa5bc

23 files changed

+954
-18
lines changed

README.md

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,26 @@ The following are the optional dependencies that may be required for certain opt
157157
These are not automatically included and setting the relevant option without adding the library will result in a `ClassNotFoundException`.
158158

159159
```xml
160-
<!-- This is required when setting setEcma262Validator(true) -->
160+
<!-- Either library is required when setting setEcma262Validator(true) or explicitly via setRegularExpressionFactory() -->
161161
<dependency>
162162
<!-- Used to validate ECMA 262 regular expressions -->
163+
<!-- Approximately 2 MB in dependencies -->
164+
<!-- JoniRegularExpressionFactory -->
163165
<groupId>org.jruby.joni</groupId>
164166
<artifactId>joni</artifactId>
165167
<version>${version.joni}</version>
166168
<optional>true</optional>
167169
</dependency>
170+
171+
<dependency>
172+
<!-- Used to validate ECMA 262 regular expressions -->
173+
<!-- Approximately 50 MB in dependencies -->
174+
<!-- GraalJSRegularExpressionFactory -->
175+
<groupId>org.graalvm.js</groupId>
176+
<artifactId>js</artifactId>
177+
<version>${version.graaljs}</version>
178+
<optional>true</optional>
179+
</dependency>
168180
```
169181

170182
##### Excludable Dependencies
@@ -258,7 +270,7 @@ SchemaValidatorsConfig config = new SchemaValidatorsConfig();
258270
// By default JSON Path is used for reporting the instance location and evaluation path
259271
config.setPathType(PathType.JSON_POINTER);
260272
// By default the JDK regular expression implementation which is not ECMA 262 compliant is used
261-
// Note that setting this to true requires including the optional joni dependency
273+
// Note that setting this to true requires including the optional joni or graaljs dependency
262274
// config.setEcma262Validator(true);
263275

264276
// Due to the mapping the schema will be retrieved from the classpath at classpath:schema/example-main.json.
@@ -293,7 +305,7 @@ SchemaValidatorsConfig config = new SchemaValidatorsConfig();
293305
// By default JSON Path is used for reporting the instance location and evaluation path
294306
config.setPathType(PathType.JSON_POINTER);
295307
// By default the JDK regular expression implementation which is not ECMA 262 compliant is used
296-
// Note that setting this to true requires including the optional joni dependency
308+
// Note that setting this to true requires including the optional joni or graaljs dependency
297309
// config.setEcma262Validator(true);
298310

299311
// Due to the mapping the meta-schema will be retrieved from the classpath at classpath:draft/2020-12/schema.
@@ -517,7 +529,7 @@ The following is sample output from the Hierarchical format.
517529
| Name | Description | Default Value
518530
|---------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------
519531
| `pathType` | The path type to use for reporting the instance location and evaluation path. Set to `PathType.JSON_POINTER` to use JSON Pointer. | `PathType.DEFAULT`
520-
| `ecma262Validator` | Whether to use the ECMA 262 `joni` library to validate the `pattern` keyword. This requires the dependency to be manually added to the project or a `ClassNotFoundException` will be thrown. | `false`
532+
| `ecma262Validator` | Whether to use the ECMA 262 `joni` or `graaljs` library to validate the `pattern` keyword. This requires the dependency to be manually added to the project or a `ClassNotFoundException` will be thrown. | `false`
521533
| `executionContextCustomizer` | This can be used to customize the `ExecutionContext` generated by the `JsonSchema` for each validation run. | `null`
522534
| `schemaIdValidator` | This is used to customize how the `$id` values are validated. Note that the default implementation allows non-empty fragments where no base IRI is specified and also allows non-absolute IRI `$id` values in the root schema. | `JsonSchemaIdValidator.DEFAULT`
523535
| `messageSource` | This is used to retrieve the locale specific messages. | `DefaultMessageSource.getInstance()`
@@ -527,6 +539,7 @@ The following is sample output from the Hierarchical format.
527539
| `locale` | The locale to use for generating messages in the `ValidationMessage`. | `Locale.getDefault()`
528540
| `failFast` | Whether to return failure immediately when an assertion is generated. | `false`
529541
| `formatAssertionsEnabled` | The default is to generate format assertions from Draft 4 to Draft 7 and to only generate annotations from Draft 2019-09. Setting to `true` or `false` will override the default behavior. | `null`
542+
| `regularExpressionFactory` | The factory to use to create regular expressions for instance `JoniRegularExpressionFactory` or `GraalJSRegularExpressionFactory`. | `JDKRegularExpressionFactory.getInstance()`
530543

531544
## Performance Considerations
532545

pom.xml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@
7575
<version.itu>1.10.2</version.itu>
7676
<version.jackson>2.17.0</version.jackson>
7777
<version.joni>2.2.1</version.joni>
78-
<version.logback>1.3.14</version.logback>
78+
<version.logback>1.3.14</version.logback> <!-- 1.4.x and above is not Java 8 compatible -->
7979
<version.slf4j>2.0.13</version.slf4j>
80+
<version.graaljs>21.3.10</version.graaljs> <!-- 22.x and above is not Java 8 compatible -->
8081

8182
<version.hamcrest>2.2</version.hamcrest>
8283
<version.junit>5.10.2</version.junit>
@@ -138,12 +139,31 @@
138139

139140
<dependency>
140141
<!-- Used to validate ECMA 262 regular expressions -->
142+
<!-- Approximately 2 MB in dependencies -->
141143
<groupId>org.jruby.joni</groupId>
142144
<artifactId>joni</artifactId>
143145
<version>${version.joni}</version>
144146
<optional>true</optional>
145147
</dependency>
146148

149+
<dependency>
150+
<!-- Used to validate ECMA 262 regular expressions -->
151+
<!-- Approximately 50 MB in dependencies -->
152+
<groupId>org.graalvm.js</groupId>
153+
<artifactId>js</artifactId>
154+
<version>${version.graaljs}</version>
155+
<optional>true</optional>
156+
</dependency>
157+
158+
<dependency>
159+
<!-- Used to validate ECMA 262 regular expressions -->
160+
<!-- Approximately 50 MB in dependencies -->
161+
<groupId>org.graalvm.sdk</groupId>
162+
<artifactId>graal-sdk</artifactId>
163+
<version>${version.graaljs}</version>
164+
<optional>true</optional>
165+
</dependency>
166+
147167
<dependency>
148168
<groupId>org.junit.jupiter</groupId>
149169
<artifactId>junit-jupiter-engine</artifactId>

src/main/java/com/networknt/schema/SchemaValidatorsConfig.java

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
import com.fasterxml.jackson.databind.JsonNode;
2020
import com.networknt.schema.i18n.DefaultMessageSource;
2121
import com.networknt.schema.i18n.MessageSource;
22+
import com.networknt.schema.regex.ECMAScriptRegularExpressionFactory;
23+
import com.networknt.schema.regex.JDKRegularExpressionFactory;
24+
import com.networknt.schema.regex.RegularExpressionFactory;
2225
import com.networknt.schema.walk.DefaultItemWalkListenerRunner;
2326
import com.networknt.schema.walk.DefaultKeywordWalkListenerRunner;
2427
import com.networknt.schema.walk.DefaultPropertyWalkListenerRunner;
@@ -59,9 +62,9 @@ public class SchemaValidatorsConfig {
5962
private ApplyDefaultsStrategy applyDefaultsStrategy = ApplyDefaultsStrategy.EMPTY_APPLY_DEFAULTS_STRATEGY;
6063

6164
/**
62-
* When set to true, use ECMA-262 compatible validator
65+
* Used to create {@link com.networknt.schema.regex.RegularExpression}.
6366
*/
64-
private boolean ecma262Validator;
67+
private RegularExpressionFactory regularExpressionFactory = JDKRegularExpressionFactory.getInstance();
6568

6669
/**
6770
* When set to true, use Java-specific semantics rather than native JavaScript
@@ -268,12 +271,55 @@ public void setHandleNullableField(boolean handleNullableField) {
268271
this.handleNullableField = handleNullableField;
269272
}
270273

274+
/**
275+
* Gets whether to use a ECMA-262 compliant regular expression validator.
276+
* <p>
277+
* This defaults to the false and setting true require inclusion of optional
278+
* org.jruby.joni:joni or org.graalvm.js:js dependencies.
279+
*
280+
* @return true if ECMA-262 compliant
281+
*/
271282
public boolean isEcma262Validator() {
272-
return this.ecma262Validator;
283+
return !(this.regularExpressionFactory instanceof JDKRegularExpressionFactory);
273284
}
274285

286+
/**
287+
* Sets whether to use a ECMA-262 compliant regular expression validator.
288+
* <p>
289+
* This defaults to the false and setting true require inclusion of optional
290+
* org.jruby.joni:joni or org.graalvm.js:js dependencies.
291+
*
292+
* @param ecma262Validator true if ECMA-262 compliant
293+
*/
275294
public void setEcma262Validator(boolean ecma262Validator) {
276-
this.ecma262Validator = ecma262Validator;
295+
this.regularExpressionFactory = ecma262Validator ? ECMAScriptRegularExpressionFactory.getInstance()
296+
: JDKRegularExpressionFactory.getInstance();
297+
}
298+
299+
/**
300+
* Gets the regular expression factory.
301+
* <p>
302+
* This defaults to the JDKRegularExpressionFactory and the implementations
303+
* require inclusion of optional org.jruby.joni:joni or org.graalvm.js:js dependencies.
304+
*
305+
* @return the factory
306+
*/
307+
public RegularExpressionFactory getRegularExpressionFactory() {
308+
return regularExpressionFactory;
309+
}
310+
311+
/**
312+
* Sets the regular expression factory.
313+
* <p>
314+
* This defaults to the JDKRegularExpressionFactory and the implementations
315+
* require inclusion of optional org.jruby.joni:joni or org.graalvm.js:js dependencies.
316+
*
317+
* @see JDKRegularExpressionFactory
318+
* @see ECMAScriptRegularExpressionFactory
319+
* @param regularExpressionFactory the factory
320+
*/
321+
public void setRegularExpressionFactory(RegularExpressionFactory regularExpressionFactory) {
322+
this.regularExpressionFactory = regularExpressionFactory;
277323
}
278324

279325
public boolean isJavaSemantics() {
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright (c) 2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.networknt.schema.regex;
17+
18+
import com.networknt.schema.utils.Classes;
19+
20+
/**
21+
* ECMAScript regular expression factory that chooses between GraalJS or Joni
22+
* implementations depending on which is on the classpath.
23+
*/
24+
public class ECMAScriptRegularExpressionFactory implements RegularExpressionFactory {
25+
private static final boolean JONI_PRESENT = Classes.isPresent("org.joni.Regex",
26+
ECMAScriptRegularExpressionFactory.class.getClassLoader());
27+
private static final boolean GRAALJS_PRESENT = Classes.isPresent("com.oracle.truffle.js.parser.GraalJSEvaluator",
28+
ECMAScriptRegularExpressionFactory.class.getClassLoader());
29+
30+
private static final RegularExpressionFactory DELEGATE = GRAALJS_PRESENT
31+
? GraalJSRegularExpressionFactory.getInstance()
32+
: JoniRegularExpressionFactory.getInstance();
33+
34+
public static final ECMAScriptRegularExpressionFactory INSTANCE = new ECMAScriptRegularExpressionFactory();
35+
36+
public static ECMAScriptRegularExpressionFactory getInstance() {
37+
if (!JONI_PRESENT && !GRAALJS_PRESENT) {
38+
throw new IllegalArgumentException(
39+
"Either org.jruby.joni:joni or org.graalvm.js:js needs to be present in the classpath");
40+
}
41+
return INSTANCE;
42+
}
43+
44+
@Override
45+
public RegularExpression getRegularExpression(String regex) {
46+
return DELEGATE.getRegularExpression(regex);
47+
}
48+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Copyright (c) 2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.networknt.schema.regex;
17+
18+
import org.graalvm.polyglot.Context;
19+
20+
/**
21+
* Factory for the js {@link Context}.
22+
*/
23+
public class GraalJSContextFactory {
24+
/**
25+
* The holder defers the classloading until it is used.
26+
*/
27+
private static class Holder {
28+
private static final Context INSTANCE = Context.newBuilder("js").option("engine.WarnInterpreterOnly", "false")
29+
.build();
30+
}
31+
32+
/**
33+
* Gets the singleton instance of the Context.
34+
* <p>
35+
* This may need to be closed to release resources if no longer needed.
36+
*
37+
* @return the Context
38+
*/
39+
public static Context getInstance() {
40+
return Holder.INSTANCE;
41+
}
42+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright (c) 2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.networknt.schema.regex;
17+
18+
import org.graalvm.polyglot.Value;
19+
20+
/**
21+
* GraalJS {@link RegularExpression}.
22+
* <p>
23+
* This requires a dependency on org.graalvm.js:js which along with its
24+
* dependency libraries are 50 mb.
25+
*/
26+
class GraalJSRegularExpression implements RegularExpression {
27+
private final GraalJSRegularExpressionContext context;
28+
private final Value function;
29+
30+
GraalJSRegularExpression(String regex, GraalJSRegularExpressionContext context) {
31+
this.context = context;
32+
synchronized(context.getContext()) {
33+
this.function = context.getRegExpBuilder().execute(regex);
34+
}
35+
}
36+
37+
@Override
38+
public boolean matches(String value) {
39+
synchronized(context.getContext()) {
40+
return !function.execute(value).isNull();
41+
}
42+
}
43+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Copyright (c) 2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.networknt.schema.regex;
17+
18+
import org.graalvm.polyglot.Context;
19+
import org.graalvm.polyglot.Value;
20+
21+
/**
22+
* GraalJSRegularExpressionContext.
23+
*/
24+
public class GraalJSRegularExpressionContext {
25+
private static final String SOURCE = "pattern => {\n"
26+
+ " const regex = new RegExp(pattern, 'u');\n"
27+
+ " return text => text.match(regex)\n"
28+
+ "};";
29+
30+
private final Context context;
31+
private final Value regExpBuilder;
32+
33+
/**
34+
* Constructor.
35+
* <p>
36+
* It is the caller's responsibility to release the context when it is no longer
37+
* required.
38+
*
39+
* @param context the context
40+
*/
41+
public GraalJSRegularExpressionContext(Context context) {
42+
this.context = context;
43+
synchronized(this.context) {
44+
this.regExpBuilder = this.context.eval("js", SOURCE);
45+
}
46+
}
47+
48+
/**
49+
* Operations must synchronize on the {@link Context} as only a single thread
50+
* can access the {@link Context} and {@link #getRegExpBuilder()} at one time.
51+
*
52+
* @return the context
53+
*/
54+
public Context getContext() {
55+
return context;
56+
}
57+
58+
/**
59+
* Gets the RegExp builder.
60+
*
61+
* @return the regexp builder
62+
*/
63+
public Value getRegExpBuilder() {
64+
return regExpBuilder;
65+
}
66+
}

0 commit comments

Comments
 (0)