Skip to content

Commit 125cd11

Browse files
committed
jsonschema: pre-compile regexps
This is the first CL that deals with the process of preparing a schema for validation. Perform some basic checks on the scheme. Along the way, compile regexps and store them in the schema for use during validation. Change-Id: I0e5e6ce28656dcecb6d2d4b2fdc98998fa05b6f1 Reviewed-on: https://go-review.googlesource.com/c/tools/+/669696 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Alan Donovan <[email protected]>
1 parent 2f18550 commit 125cd11

File tree

8 files changed

+153
-34
lines changed

8 files changed

+153
-34
lines changed

internal/mcp/internal/jsonschema/infer_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"testing"
99

1010
"github.com/google/go-cmp/cmp"
11+
"github.com/google/go-cmp/cmp/cmpopts"
1112
"golang.org/x/tools/internal/mcp/internal/jsonschema"
1213
)
1314

@@ -63,7 +64,7 @@ func TestForType(t *testing.T) {
6364

6465
for _, test := range tests {
6566
t.Run(test.name, func(t *testing.T) {
66-
if diff := cmp.Diff(test.want, test.got); diff != "" {
67+
if diff := cmp.Diff(test.want, test.got, cmpopts.IgnoreUnexported(jsonschema.Schema{})); diff != "" {
6768
t.Errorf("ForType mismatch (-want +got):\n%s", diff)
6869
}
6970
})
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// This file deals with preparing a schema for validation, including various checks,
6+
// optimizations, and the resolution of cross-schema references.
7+
8+
package jsonschema
9+
10+
import (
11+
"errors"
12+
"fmt"
13+
"regexp"
14+
)
15+
16+
// A Resolved consists of a [Schema] along with associated information needed to
17+
// validate documents against it.
18+
// A Resolved has been validated against its meta-schema, and all its references
19+
// (the $ref and $dynamicRef keywords) have been resolved to their referenced Schemas.
20+
// Call [Schema.Resolve] to obtain a Resolved from a Schema.
21+
type Resolved struct {
22+
root *Schema
23+
}
24+
25+
// Resolve resolves all references within the schema and performs other tasks that
26+
// prepare the schema for validation.
27+
func (root *Schema) Resolve() (*Resolved, error) {
28+
// There are three steps involved in preparing a schema to validate.
29+
// 1. Check: validate the schema against a meta-schema, and perform other well-formedness
30+
// checks. Precompute some values along the way.
31+
// 2. Resolve URIs: TODO.
32+
// 3. Resolve references: TODO.
33+
if err := root.check(); err != nil {
34+
return nil, err
35+
}
36+
return &Resolved{root: root}, nil
37+
}
38+
39+
func (s *Schema) check() error {
40+
if s == nil {
41+
return errors.New("nil schema")
42+
}
43+
var errs []error
44+
report := func(err error) { errs = append(errs, err) }
45+
46+
for ss := range s.all() {
47+
ss.checkLocal(report)
48+
}
49+
return errors.Join(errs...)
50+
}
51+
52+
// checkLocal checks s for validity, independently of other schemas it may refer to.
53+
// Since checking a regexp involves compiling it, checkLocal saves those compiled regexps
54+
// in the schema for later use.
55+
// It appends the errors it finds to errs.
56+
func (s *Schema) checkLocal(report func(error)) {
57+
addf := func(format string, args ...any) {
58+
report(fmt.Errorf("jsonschema.Schema: "+format, args...))
59+
}
60+
61+
if s == nil {
62+
addf("nil subschema")
63+
return
64+
}
65+
if err := s.basicChecks(); err != nil {
66+
report(err)
67+
return
68+
}
69+
70+
// TODO: validate the schema's properties,
71+
// ideally by jsonschema-validating it against the meta-schema.
72+
73+
// Check and compile regexps.
74+
if s.Pattern != "" {
75+
re, err := regexp.Compile(s.Pattern)
76+
if err != nil {
77+
addf("pattern: %w", err)
78+
} else {
79+
s.pattern = re
80+
}
81+
}
82+
if len(s.PatternProperties) > 0 {
83+
s.patternProperties = map[*regexp.Regexp]*Schema{}
84+
for reString, subschema := range s.PatternProperties {
85+
re, err := regexp.Compile(reString)
86+
if err != nil {
87+
addf("patternProperties[%q]: %w", reString, err)
88+
continue
89+
}
90+
s.patternProperties[re] = subschema
91+
}
92+
}
93+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package jsonschema
6+
7+
import (
8+
"regexp"
9+
"testing"
10+
)
11+
12+
func TestCheckLocal(t *testing.T) {
13+
for _, tt := range []struct {
14+
s *Schema
15+
want string // error must be non-nil and match this regexp
16+
}{
17+
{nil, "nil"},
18+
{
19+
&Schema{Pattern: "]["},
20+
"regexp",
21+
},
22+
{
23+
&Schema{PatternProperties: map[string]*Schema{"*": nil}},
24+
"regexp",
25+
},
26+
} {
27+
_, err := tt.s.Resolve()
28+
if err == nil {
29+
t.Errorf("%s: unexpectedly passed", tt.s.json())
30+
continue
31+
}
32+
if !regexp.MustCompile(tt.want).MatchString(err.Error()) {
33+
t.Errorf("%s: did not match\nerror: %s\nregexp: %s",
34+
tt.s.json(), err, tt.want)
35+
}
36+
}
37+
}

internal/mcp/internal/jsonschema/schema.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"fmt"
1414
"iter"
1515
"math"
16+
"regexp"
1617
)
1718

1819
// A Schema is a JSON schema object.
@@ -106,6 +107,10 @@ type Schema struct {
106107
Then *Schema `json:"then,omitempty"`
107108
Else *Schema `json:"else,omitempty"`
108109
DependentSchemas map[string]*Schema `json:"dependentSchemas,omitempty"`
110+
111+
// computed fields
112+
pattern *regexp.Regexp
113+
patternProperties map[*regexp.Regexp]*Schema
109114
}
110115

111116
// String returns a short description of the schema.

internal/mcp/internal/jsonschema/validate.go

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"math"
1111
"math/big"
1212
"reflect"
13-
"regexp"
1413
"slices"
1514
"strings"
1615
"unicode/utf8"
@@ -19,16 +18,9 @@ import (
1918
// The value of the "$schema" keyword for the version that we can validate.
2019
const draft202012 = "https://json-schema.org/draft/2020-12/schema"
2120

22-
// Temporary definition of ResolvedSchema.
23-
// The full definition deals with references between schemas, specifically the $id, $anchor and $ref keywords.
24-
// We'll ignore that for now.
25-
type ResolvedSchema struct {
26-
root *Schema
27-
}
28-
2921
// Validate validates the instance, which must be a JSON value, against the schema.
3022
// It returns nil if validation is successful or an error if it is not.
31-
func (rs *ResolvedSchema) Validate(instance any) error {
23+
func (rs *Resolved) Validate(instance any) error {
3224
if s := rs.root.Schema; s != "" && s != draft202012 {
3325
return fmt.Errorf("cannot validate version %s, only %s", s, draft202012)
3426
}
@@ -39,7 +31,7 @@ func (rs *ResolvedSchema) Validate(instance any) error {
3931

4032
// state is the state of single call to ResolvedSchema.Validate.
4133
type state struct {
42-
rs *ResolvedSchema
34+
rs *Resolved
4335
depth int
4436
}
4537

@@ -60,10 +52,8 @@ func (st *state) validate(instance reflect.Value, schema *Schema, callerAnns *an
6052
return fmt.Errorf("max recursion depth of %d reached", st.depth)
6153
}
6254

63-
// Treat the nil schema like the empty schema, as accepting everything.
64-
if schema == nil {
65-
return nil
66-
}
55+
// We checked for nil schemas in [Schema.Resolve].
56+
assert(schema != nil, "nil schema")
6757

6858
// Step through interfaces.
6959
if instance.IsValid() && instance.Kind() == reflect.Interface {
@@ -156,15 +146,8 @@ func (st *state) validate(instance reflect.Value, schema *Schema, callerAnns *an
156146
}
157147
}
158148

159-
if schema.Pattern != "" {
160-
// TODO(jba): compile regexps during schema validation.
161-
m, err := regexp.MatchString(schema.Pattern, str)
162-
if err != nil {
163-
return err
164-
}
165-
if !m {
166-
return fmt.Errorf("pattern: %q does not match pattern %q", str, schema.Pattern)
167-
}
149+
if schema.Pattern != "" && !schema.pattern.MatchString(str) {
150+
return fmt.Errorf("pattern: %q does not match regular expression %q", str, schema.Pattern)
168151
}
169152
}
170153

@@ -364,13 +347,8 @@ func (st *state) validate(instance reflect.Value, schema *Schema, callerAnns *an
364347
for vprop, val := range instance.Seq2() {
365348
prop := vprop.String()
366349
// Check every matching pattern.
367-
for pattern, schema := range schema.PatternProperties {
368-
// TODO(jba): pre-compile regexps
369-
m, err := regexp.MatchString(pattern, prop)
370-
if err != nil {
371-
return err
372-
}
373-
if m {
350+
for re, schema := range schema.patternProperties {
351+
if re.MatchString(prop) {
374352
if err := st.validate(val, schema, nil, append(path, prop)); err != nil {
375353
return err
376354
}

internal/mcp/internal/jsonschema/validate_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ func TestValidate(t *testing.T) {
5151
}
5252
for _, g := range groups {
5353
t.Run(g.Description, func(t *testing.T) {
54-
rs := &ResolvedSchema{root: g.Schema}
54+
rs, err := g.Schema.Resolve()
55+
if err != nil {
56+
t.Fatal(err)
57+
}
5558
for s := range g.Schema.all() {
5659
if s.Defs != nil || s.Ref != "" {
5760
t.Skip("schema or subschema has unimplemented keywords")

internal/mcp/mcp_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"time"
1616

1717
"github.com/google/go-cmp/cmp"
18+
"github.com/google/go-cmp/cmp/cmpopts"
1819
"golang.org/x/tools/internal/mcp/internal/jsonschema"
1920
"golang.org/x/tools/internal/mcp/internal/protocol"
2021
)
@@ -149,7 +150,7 @@ func TestEndToEnd(t *testing.T) {
149150
AdditionalProperties: falseSchema,
150151
},
151152
}}
152-
if diff := cmp.Diff(wantTools, gotTools); diff != "" {
153+
if diff := cmp.Diff(wantTools, gotTools, cmpopts.IgnoreUnexported(jsonschema.Schema{})); diff != "" {
153154
t.Fatalf("tools/list mismatch (-want +got):\n%s", diff)
154155
}
155156

internal/mcp/tool_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"testing"
1010

1111
"github.com/google/go-cmp/cmp"
12+
"github.com/google/go-cmp/cmp/cmpopts"
1213
"golang.org/x/tools/internal/mcp"
1314
"golang.org/x/tools/internal/mcp/internal/jsonschema"
1415
)
@@ -82,7 +83,7 @@ func TestMakeTool(t *testing.T) {
8283
},
8384
}
8485
for _, test := range tests {
85-
if diff := cmp.Diff(test.want, test.tool.Definition.InputSchema); diff != "" {
86+
if diff := cmp.Diff(test.want, test.tool.Definition.InputSchema, cmpopts.IgnoreUnexported(jsonschema.Schema{})); diff != "" {
8687
t.Errorf("MakeTool(%v) mismatch (-want +got):\n%s", test.tool.Definition.Name, diff)
8788
}
8889
}

0 commit comments

Comments
 (0)